From 6c4647a58e976e5eb88b0e63dfd07dd0b3170df1 Mon Sep 17 00:00:00 2001 From: iammeghana Date: Mon, 19 Aug 2024 14:37:28 -0400 Subject: [PATCH 1/2] added utility functions --- book/chapters/gridmet.ipynb | 2177 ++++++++++++++++++----------------- 1 file changed, 1099 insertions(+), 1078 deletions(-) diff --git a/book/chapters/gridmet.ipynb b/book/chapters/gridmet.ipynb index 92ba6f2..9f9c318 100644 --- a/book/chapters/gridmet.ipynb +++ b/book/chapters/gridmet.ipynb @@ -30,1116 +30,1381 @@ }, { "cell_type": "markdown", - "id": "3a1c3a47d3070625", + "id": "9ba26855", "metadata": {}, "source": [ - "## 3.2.1.1 Setup and Variable Mapping\n", + "## 3.2.1.1 Create a GridMET to DEM Mapper\n", "\n", - "The following code snippet sets up the environment by importing necessary libraries, defining a workspace, and mapping variables.\n", + "Here we generates a mapping between the coordinates in a DEM (Digital Elevation Model) and the corresponding coordinates in a GridMET dataset, saving the result to a CSV file.\n", "\n", - "- `gridmet_var_mapping`: A dictionary that associates short-form variable names with their full descriptive names.\n", + "- `nc_file`: A NetCDF file containing GridMET data, from which latitude and longitude arrays are extracted.\n", + "- `western_us_dem_df`: A DataFrame containing DEM coordinates loaded from a CSV file.\n", + "- `target_csv_path`: A string representing the file path where the resulting mapping CSV will be saved.\n", + "- `latitudes`: A `numpy` array of latitude values extracted from the NetCDF file.\n", + "- `longitudes`: A `numpy` array of longitude values extracted from the NetCDF file.\n", + "- `get_gridmet_var_value(row)`: A function that finds the nearest GridMET coordinates for a given DEM coordinate and returns those coordinates along with their indices.\n", "\n", - "- The `colors` list represents a gradient of colors associated with specific ranges of values" + "Here we create a detailed mapping between DEM coordinates and GridMET coordinates, facilitating the integration of data from different sources. This is crucial for tasks like spatial analysis, where accurate alignment between datasets is required." ] }, { "cell_type": "code", - "execution_count": 59, - "id": "174c8d71", + "execution_count": 69, + "id": "6173cfa8", "metadata": {}, "outputs": [], "source": [ - "import os\n", - "import numpy as np\n", - "import pandas as pd\n", - "import netCDF4 as nc\n", - "import urllib.request\n", - "from datetime import datetime, timedelta, date\n", - "import matplotlib.pyplot as plt\n", - "\n", - "work_dir = \"../data/gridmet_test_run\"\n", - "\n", - "gridmet_var_mapping = {\n", - " \"etr\": \"potential_evapotranspiration\",\n", - " \"pr\":\"precipitation_amount\",\n", - " \"rmax\":\"relative_humidity\",\n", - " \"rmin\":\"relative_humidity\",\n", - " \"tmmn\":\"air_temperature\",\n", - " \"tmmx\":\"air_temperature\",\n", - " \"vpd\":\"mean_vapor_pressure_deficit\",\n", - " \"vs\":\"wind_speed\",\n", - "}\n", - "\n", - "colors = [\n", - " (0.8627, 0.8627, 0.8627), # #DCDCDC - 0 - 1\n", - " (0.8627, 1.0000, 1.0000), # #DCFFFF - 1 - 2\n", - " (0.6000, 1.0000, 1.0000), # #99FFFF - 2 - 4\n", - " (0.5569, 0.8235, 1.0000), # #8ED2FF - 4 - 6\n", - " (0.4509, 0.6196, 0.8745), # #739EDF - 6 - 8\n", - " (0.4157, 0.4706, 1.0000), # #6A78FF - 8 - 10\n", - " (0.4235, 0.2784, 1.0000), # #6C47FF - 10 - 12\n", - " (0.5529, 0.0980, 1.0000), # #8D19FF - 12 - 14\n", - " (0.7333, 0.0000, 0.9176), # #BB00EA - 14 - 16\n", - " (0.8392, 0.0000, 0.7490), # #D600BF - 16 - 18\n", - " (0.7569, 0.0039, 0.4549), # #C10074 - 18 - 20\n", - " (0.6784, 0.0000, 0.1961), # #AD0032 - 20 - 30\n", - " (0.5020, 0.0000, 0.0000) # #800000 - > 30\n", - "]" + "def create_gridmet_to_dem_mapper(nc_file):\n", + " western_us_dem_df = pd.read_csv(western_us_coords)\n", + " # Check if the CSV already exists\n", + " target_csv_path = f'{work_dir}/gridmet_to_dem_mapper.csv'\n", + " if os.path.exists(target_csv_path):\n", + " print(f\"File {target_csv_path} already exists, skipping..\")\n", + " return\n", + " \n", + " # get the netcdf file and generate the csv file for every coordinate in the dem_template.csv\n", + " selected_date = datetime.strptime(test_start_date, \"%Y-%m-%d\")\n", + " # Read the NetCDF file\n", + " with nc.Dataset(nc_file) as nc_file:\n", + " \n", + " # Get the values at each coordinate using rasterio's sample function\n", + " latitudes = nc_file.variables['lat'][:]\n", + " longitudes = nc_file.variables['lon'][:]\n", + " \n", + " def get_gridmet_var_value(row):\n", + " # Perform your custom calculation here\n", + " gridmet_lat_index = find_nearest_index(latitudes, float(row[\"Latitude\"]))\n", + " gridmet_lon_index = find_nearest_index(longitudes, float(row[\"Longitude\"]))\n", + " return latitudes[gridmet_lat_index], longitudes[gridmet_lon_index], gridmet_lat_index, gridmet_lon_index\n", + " \n", + " # Use the apply function to apply the custom function to each row\n", + " western_us_dem_df[['gridmet_lat', 'gridmet_lon', \n", + " 'gridmet_lat_idx', 'gridmet_lon_idx',]] = western_us_dem_df.apply(lambda row: pd.Series(get_gridmet_var_value(row)), axis=1)\n", + " western_us_dem_df.rename(columns={\"Latitude\": \"dem_lat\", \n", + " \"Longitude\": \"dem_lon\"}, inplace=True)\n", + " \n", + " # Save the new converted AMSR to CSV file\n", + " western_us_dem_df.to_csv(target_csv_path, index=False)\n", + " \n", + " return western_us_dem_df" ] }, { "cell_type": "markdown", - "id": "6a0c2267", + "id": "e44f963a", "metadata": {}, "source": [ - "## 3.2.1.2 Map Values to Colors\n", - "\n", - "Here we generate a color mapping for a given column of data based on specified or automatically calculated value ranges. It returns the color mapping and the value ranges used.\n", + "## 3.2.1.2 Extracts NetCDF Data by Coordinates and Variable\n", "\n", - "- `df_col` (required): The data column from a DataFrame to map to colors.\n", - "- `value_ranges` (optional): A list of value ranges to determine the mapping of data values to colors. If not provided, the function calculates the ranges automatically.\n", - "- `map_value_to_color` takes a value and maps it to a color based on the `new_value_ranges`.\n", - "- It iterates through the ranges, assigning a color to the value based on which range it falls into.\n", - "- If the value is greater than all the defined ranges, the last color in the list is used.\n", + "The following code extracts specific meteorological data from a NetCDF file based on provided coordinates and a variable name, and returns the data in a pandas DataFrame.\n", "\n", - "we get the `color_mapping` (list of colors corresponding to each value in the data column) and `new_value_ranges` (the calculated or provided value ranges)." + "- `mapper_df`: A DataFrame containing the mapping between DEM coordinates and GridMET coordinates.\n", + "- `latitudes`: A `numpy` array of latitude values from the NetCDF file.\n", + "- `longitudes`: A `numpy` array of longitude values from the NetCDF file.\n", + "- `var_col`: The data array for the selected variable from the NetCDF file.\n", + "- `get_gridmet_var_value(row)`: Here we extract the variable value for each coordinate from the NetCDF data.\n", + "- `Latitude`, `Longitude`: The final latitude and longitude columns in the returned DataFrame.\n", + "- Here we automate the process of extracting specific meteorological data from a NetCDF file based on geospatial coordinates, allowing for detailed analysis of climate variables at specific locations.\n", + "- By leveraging a pre-generated mapping (from DEM to GridMET coordinates `3.1.10`), the function efficiently retrieves the data for the exact locations of interest, making it highly useful in spatial analysis and modeling tasks.\n" ] }, { "cell_type": "code", - "execution_count": 60, - "id": "47f03b97", + "execution_count": 71, + "id": "c09fdd0c", "metadata": {}, "outputs": [], "source": [ - "def create_color_maps_with_value_range(df_col, value_ranges=None):\n", - " if value_ranges == None:\n", - " max_value = df_col.max()\n", - " min_value = df_col.min()\n", - " if min_value < 0:\n", - " min_value = 0\n", - " step_size = (max_value - min_value) / 12\n", - "\n", - " # Create 10 periods\n", - " new_value_ranges = [min_value + i * step_size for i in range(12)]\n", - " # Define your custom function to map data values to colors\n", - " def map_value_to_color(value):\n", - " # Iterate through the value ranges to find the appropriate color index\n", - " for i, range_max in enumerate(new_value_ranges):\n", - " if value <= range_max:\n", - " return colors[i]\n", - "\n", - " # If the value is greater than the largest range, return the last color\n", - " return colors[-1]\n", + "def get_nc_csv_by_coords_and_variable(nc_file, var_name, target_date=test_start_date):\n", + " \n", + " create_gridmet_to_dem_mapper(nc_file)\n", + " \t\n", + " mapper_df = pd.read_csv(f'{work_dir}/gridmet_to_dem_mapper.csv')\n", + " \n", + " # get the netcdf file and generate the csv file for every coordinate in the dem_template.csv\n", + " selected_date = datetime.strptime(target_date, \"%Y-%m-%d\")\n", + " # Read the NetCDF file\n", + " with nc.Dataset(nc_file) as nc_file:\n", + " # Get a list of all variables in the NetCDF file\n", + " variables = nc_file.variables.keys()\n", + " \n", + " # Get the values at each coordinate using rasterio's sample function\n", + " latitudes = nc_file.variables['lat'][:]\n", + " longitudes = nc_file.variables['lon'][:]\n", + " day = nc_file.variables['day'][:]\n", + " long_var_name = gridmet_var_mapping[var_name]\n", + " var_col = nc_file.variables[long_var_name][:]\n", "\n", - " # Map predicted_swe values to colors using the custom function\n", - " color_mapping = [map_value_to_color(value) for value in df_col.values]\n", - " return color_mapping, new_value_ranges" + " \n", + " # Calculate the day of the year\n", + " day_of_year = selected_date.timetuple().tm_yday\n", + " day_index = day_of_year - 1\n", + " \n", + " def get_gridmet_var_value(row):\n", + " # Perform your custom calculation here\n", + " lat_index = int(row[\"gridmet_lat_idx\"])\n", + " lon_index = int(row[\"gridmet_lon_idx\"])\n", + " var_value = var_col[day_index, lat_index, lon_index]\n", + " \n", + " return var_value\n", + " \n", + " # Use the apply function to apply the custom function to each row\n", + " mapper_df[var_name] = mapper_df.apply(get_gridmet_var_value, axis=1)\n", + " \n", + " # drop useless columns\n", + " mapper_df = mapper_df[[\"dem_lat\", \"dem_lon\", var_name]]\n", + " mapper_df.rename(columns={\"dem_lat\": \"Latitude\",\n", + " \"dem_lon\": \"Longitude\"}, inplace=True)\n", + " return mapper_df" ] }, { "cell_type": "markdown", - "id": "341d9f43", + "id": "6c685758", "metadata": {}, "source": [ - "## 3.2.1.3 Retrive the Current Year\n", + "## 3.2.1.3 Converts GridMET NetCDF Files to CSV\n", "\n", - "The following code snippet retrives the current year from the system's date and time." + "Here we converts NetCDF files containing GridMET meteorological data into CSV files for a specific date. It processes each NetCDF file in a specified directory and extracts relevant data based on the date provided.\n", + "\n", + "- `selected_date`: A `datetime` object representing the `target_date`.\n", + "- `generated_csvs`: A list that stores the paths to the CSV files that are generated during the function's execution.\n", + "- `res_csv`: The path where the resulting CSV file will be saved.\n", + "\n", + "- Here we automate the process of converting multiple NetCDF files into CSV format, making it easier to handle and analyze the data outside of specialized NetCDF tools.\n", + "\n", + "- We extract data for a specific variable from a NetCDF file by matching coordinates from a DEM template CSV file. This enables us to create a DataFrame containing the variable values alongside the corresponding coordinates. \n", + "\n", + "- By doing so, we can effectively extract and analyze meteorological data for specific geographical locations, aiding in various environmental and geographical studies, as well as modeling endeavors." ] }, { "cell_type": "code", - "execution_count": 61, - "id": "92e47a12", + "execution_count": 72, + "id": "46b63faa", "metadata": {}, "outputs": [], "source": [ - "def get_current_year():\n", - " \"\"\"\n", - " Get the current year.\n", + "def turn_gridmet_nc_to_csv(target_date=test_start_date):\n", + " \n", + " selected_date = datetime.strptime(target_date, \"%Y-%m-%d\")\n", + " generated_csvs = []\n", + " for root, dirs, files in os.walk(gridmet_folder_name):\n", + " for file_name in files:\n", + " \n", + " if str(selected_date.year) in file_name and file_name.endswith(\".nc\"):\n", + " print(f\"Checking file: {file_name}\")\n", + " var_name = get_var_from_file_name(file_name)\n", + " res_csv = f\"../data/gridmet_test_run/testing_output/{str(selected_date.year)}_{var_name}_{target_date}.csv\"\n", "\n", - " Returns:\n", - " int: The current year.\n", - " \"\"\"\n", - " now = datetime.now()\n", - " current_year = now.year\n", - " return current_year" + " if os.path.exists(res_csv):\n", + " #os.remove(res_csv)\n", + " # print(f\"{res_csv} already exists. Skipping..\")\n", + " print(f\"File {os.path.basename(res_csv)} exists\")\n", + " generated_csvs.append(res_csv)\n", + " continue\n", + "\n", + " # Perform operations on each file here\n", + " netcdf_file_path = os.path.join(root, file_name)\n", + " print(\"Processing file:\", netcdf_file_path)\n", + " file_name = get_file_name_from_path(netcdf_file_path)\n", + "\n", + " df = get_nc_csv_by_coords_and_variable(netcdf_file_path, \n", + " var_name, target_date)\n", + " df.replace('--', pd.NA, inplace=True)\n", + " df.to_csv(res_csv, index=False)\n", + " print(\"gridmet var saved: \", res_csv)\n", + " generated_csvs.append(res_csv)\n", + " \n", + " return generated_csvs " ] }, { "cell_type": "markdown", - "id": "c84bca49", + "id": "77512189", "metadata": {}, "source": [ - "## 3.2.1.4 Removes Specific Files in a Folder\n", + "## 3.2.1.4 Plot GridMET Data\n", "\n", - "We remove all files within the specified folder.\n", + "The following code snippet generates a scatter plot of GridMET data for a specified date and saves the resulting image to a file. It processes data from a CSV file and creates a visual representation of the variable \"pr\" (precipitation) on a geographical grid.\n", "\n", - "- `folder_path`: A string representing the directory where files are to be removed.\n", - "- `current_year`: An integer representing the current year, used to filter which files should be deleted.\n", - "- `files`: A list containing the names of all items (files and directories) within the specified folder.\n", - "- `file_path`: A string representing the full path to each file in the folder, constructed by joining folder_path and the file name.\n", + "- `target_date`: The date for which the data is plotted, formatted as \"YYYY-MM-DD\".\n", + "- `selected_date`: A `datetime` object representing the `target_date`.\n", + "- `var_name`: A string representing the name of the variable to be plotted, set to \"pr\" (precipitation).\n", + "- `test_csv`: The file path to the CSV file containing the data to be plotted.\n", + "- `gridmet_var_df`: A DataFrame containing the loaded data from the CSV file.\n", + "- `colormaplist`: A list of colors corresponding to the value ranges in the data.\n", + "- `value_ranges`: The value ranges used to map the colors in the plot.\n", + "- `res_png_path`: The file path where the resulting plot image will be saved.\n", "\n", - "- The function then loops through each item in the `files` list. For each item:\n", - " - `file_path = os.path.join(folder_path, file)` constructs the full path to the file by combining the folder path and the file name.\n", - " - `if os.path.isfile(file_path) and str(current_year) in file_path and file_path.endswith(\".nc\"):` checks if the item is a file (not a directory), if the file name contains the current year as a substring, and if the file has a `.nc` extension (indicating a NetCDF file).\n", - " - If all these conditions are met, the file is deleted using `os.remove(file_path)`, and a message is printed to confirm the deletion." + "- **Visualization:** This function provides a visual representation of the GridMET data, specifically focusing on precipitation (\"pr\") values. Visualization helps in understanding spatial patterns and distributions in the data, making it easier to interpret and analyze.\n", + "- **Data Communication:** By saving the plot as an image, the function allows the results to be easily shared, included in reports, or further analyzed.\n", + "\n", + "We convert GridMET NetCDF files to CSV format for a specified date. We iterate through files in the GridMET folder, checking for files corresponding to the selected date. For each matching file, we extract the variable name and generate a CSV file containing the data. If the CSV file already exists, we skip the process. This process facilitates easy access and analysis of meteorological data for a specific date." ] }, { "cell_type": "code", - "execution_count": 62, - "id": "1b96876c", + "execution_count": 73, + "id": "62f7b586", "metadata": {}, "outputs": [], "source": [ - "def remove_files_in_folder(folder_path, current_year):\n", - " \"\"\"\n", - " Remove all files in a specified folder.\n", - "\n", - " Parameters:\n", - " folder_path (str): Path to the folder to remove files from.\n", - " \"\"\"\n", - " # Get a list of files in the folder\n", - " files = os.listdir(folder_path)\n", + "def plot_gridmet(target_date=test_start_date):\n", + " selected_date = datetime.strptime(target_date, \"%Y-%m-%d\")\n", + " var_name = \"pr\"\n", + " test_csv = f\"../data/gridmet_test_run/testing_output/{str(selected_date.year)}_{var_name}_{target_date}.csv\"\n", + " gridmet_var_df = pd.read_csv(test_csv)\n", + " gridmet_var_df.replace('--', pd.NA, inplace=True)\n", + " gridmet_var_df.dropna(inplace=True)\n", + " gridmet_var_df['pr'] = pd.to_numeric(gridmet_var_df['pr'], errors='coerce')\n", + " \n", + " colormaplist, value_ranges = create_color_maps_with_value_range(gridmet_var_df[var_name])\n", + " \n", + " # Create a scatter plot\n", + " plt.scatter(gridmet_var_df[\"Longitude\"].values, \n", + " gridmet_var_df[\"Latitude\"].values, \n", + " label='Pressure', \n", + " color=colormaplist, \n", + " marker='o')\n", "\n", - " # Loop through the files and remove them\n", - " for file in files:\n", - " file_path = os.path.join(folder_path, file)\n", - " if os.path.isfile(file_path) and str(current_year) in file_path and file_path.endswith(\".nc\"):\n", - " os.remove(file_path)\n", - " print(f\"Deleted file: {file_path}\")" + " # Add labels and a legend\n", + " plt.xlabel('X-axis')\n", + " plt.ylabel('Y-axis')\n", + " plt.title('Scatter Plot Example')\n", + " plt.legend()\n", + " \n", + " res_png_path = f\"../data/gridmet_test_run/testing_output/{str(selected_date.year)}_{var_name}_{target_date}.png\"\n", + " plt.savefig(res_png_path)\n", + " print(f\"test image is saved at {res_png_path}\")" ] }, { "cell_type": "markdown", - "id": "770f22a3", + "id": "c27997ec", "metadata": {}, "source": [ - "## 3.2.1.5 Download File from a URL\n", + "## 3.2.1.5 Prepare Folder and Generating Year List\n", + "The code snippet prepares a directory for storing GridMET NetCDF files and determines the relevant years based on a specified target date. It also checks if existing files cover the selected date and removes them if necessary to ensure up-to-date data.\n", "\n", - "Here we download a file from a given URL and saves it to a specified location.\n", + "- `target_date`: The date for which the data is being prepared, formatted as \"YYYY-MM-DD\".\n", + "- `selected_date`: A `datetime` object representing the `target_date`.\n", + "- `past_october_1`: A `datetime` object representing October 1st of either the current year or the previous year, depending on the `selected_date`.\n", + "- `year_list`: A list of years that are relevant to the `selected_date`, used for data processing.\n", + "- `gridmet_folder_name`: The directory where the GridMET NetCDF files are stored.\n", + "- `nc_file`: The file path to the NetCDF file for the `tmmx` variable of the current year.\n", + "- `ifremove`: A boolean flag indicating whether the existing files should be removed based on the date coverage.\n", "\n", - "- `url`: A string representing the URL from which the file is to be downloaded.\n", - " - `target_file_path`: A string representing the path where the downloaded file should be saved.\n", + "- **Folder Preparation:** Ensuring that the necessary directory exists before proceeding with file operations is crucial for organizing and managing data effectively.\n", + "- **Data Integrity:** By checking whether existing files cover the required date range and removing them if they do not, the function ensures that the data used in the project is up-to-date and accurate.\n", + "- **Year Selection:** The `year_list` is essential for determining which years' data should be processed, ensuring that the analysis covers the appropriate time span.\n", "\n", - "- `with urllib.request.urlopen(url) as response:` opens a connection to the provided URL.\n", - " - `file_content = response.read()` reads the contents of the file from the URL." + "We plot GridMET meteorological data for a specific variable and date. We read the data from a corresponding CSV file and preprocess it, ensuring valid numerical values. Then, we create a scatter plot, mapping the variable values to geographic coordinates. The color of each point on the plot represents the magnitude of the variable value. Finally, we save the plot as a PNG image for further analysis and visualization." ] }, { "cell_type": "code", - "execution_count": 63, - "id": "d7c28a1a", + "execution_count": 74, + "id": "1c643f91", "metadata": {}, "outputs": [], "source": [ - "def download_file(url, target_file_path, variable):\n", - " \"\"\"\n", - " Download a file from a URL and save it to a specified location.\n", + "def prepare_folder_and_get_year_list(target_date=test_start_date):\n", + " # Check if the folder exists, if not, create it\n", + " if not os.path.exists(gridmet_folder_name):\n", + " os.makedirs(gridmet_folder_name)\n", "\n", - " Parameters:\n", - " url (str): URL of the file to download.\n", - " target_file_path (str): Path where the downloaded file should be saved.\n", - " variable (str): Name of the meteorological variable being downloaded.\n", - " \"\"\"\n", - " try:\n", - " with urllib.request.urlopen(url) as response:\n", - " print(f\"Downloading {url}\")\n", - " file_content = response.read()\n", - " save_path = target_file_path\n", - " with open(save_path, 'wb') as file:\n", - " file.write(file_content)\n", - " print(f\"File downloaded successfully and saved as: {os.path.basename(save_path)}\")\n", - " except Exception as e:\n", - " print(f\"An error occurred while downloading the file: {str(e)}\")" + " selected_date = datetime.strptime(target_date, \"%Y-%m-%d\")\n", + " if selected_date.month < 10:\n", + " past_october_1 = datetime(selected_date.year - 1, 10, 1)\n", + " else:\n", + " past_october_1 = datetime(selected_date.year, 10, 1)\n", + " year_list = [selected_date.year, past_october_1.year]\n", + "\n", + " # Remove any existing files in the folder\n", + " if selected_date.year == datetime.now().year:\n", + " # check if the current year's netcdf contains the selected date\n", + " # get etr netcdf and read\n", + " nc_file = f\"{gridmet_folder_name}/tmmx_{selected_date.year}.nc\"\n", + " ifremove = False\n", + " if os.path.exists(nc_file):\n", + " with nc.Dataset(nc_file) as ncd:\n", + " day = ncd.variables['day'][:]\n", + " # Calculate the day of the year\n", + " day_of_year = selected_date.timetuple().tm_yday\n", + " day_index = day_of_year - 1\n", + " if len(day) <= day_index:\n", + " ifremove = True\n", + " \n", + " if ifremove:\n", + " print(\"The current year netcdf has new data. Redownloading..\")\n", + " remove_files_in_folder(gridmet_folder_name, selected_date.year) # only redownload when the year is the current year\n", + " else:\n", + " print(\"The existing netcdf already covers the selected date. Avoid downloading..\")\n", + " return year_list" ] }, { "cell_type": "markdown", - "id": "2525baae", + "id": "ccfe1332", "metadata": {}, "source": [ - "## 3.2.1.6 Downloads Specific Meteorological Variables\n", - "\n", - "Here we download specific meteorological variables from the GridMET climatology dataset for a list of years provided as input.\n", + "## 3.2.1.6 Adds a cummulative column\n", "\n", - "- `year_list`: A list of years for which the meteorological data is to be downloaded.\n", - "- `base_metadata_url` is a string that stores the base URL from where the meteorological data files will be downloaded.\n", - "- `variables_list` is a list containing the short names of the meteorological variables to be downloaded, such as `tmmn` (minimum temperature), `tmmx` (maximum temperature), `pr` (precipitation), `vpd` (vapor pressure deficit), `etr` (evapotranspiration), `rmax` (maximum relative humidity), `rmin` (minimum relative humidity), `vs` (wind speed).\n", + "Herw we add a new column to a DataFrame that contains the cumulative sum of the values in an existing column.\n", "\n", - "- The function loops through each variable in `variables_list`.\n", - "- For each variable, it further loops through each year in `year_list`." - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "id": "b73d3c85", - "metadata": {}, - "outputs": [], - "source": [ - "def download_gridmet_of_specific_variables(year_list):\n", - " \"\"\"\n", - " Download specific meteorological variables from the GridMET climatology dataset.\n", - " \"\"\"\n", - " # Make a directory to store the downloaded files\n", + "- `df`: The pandas DataFrame that contains the data to which the cumulative sum column will be added.\n", + "- `column_name`: The name of the column in the DataFrame for which the cumulative sum is to be calculated.\n", "\n", - " base_metadata_url = \"http://www.northwestknowledge.net/metdata/data/\"\n", - " variables_list = ['tmmn', 'tmmx', 'pr', 'vpd', 'etr', 'rmax', 'rmin', 'vs']\n", + "- **Data Enrichment:** Adding a cumulative sum column is useful for understanding trends over time, such as total precipitation over a period, cumulative sales, or accumulated values in any time series data.\n", + "- **Ease of Analysis:** By including the cumulative sum directly in the DataFrame, the function simplifies further analysis and visualization tasks that might require cumulative data.\n", "\n", - " for var in variables_list[:3]:\n", - " for y in year_list:\n", - " download_link = base_metadata_url + var + '_' + '%s' % y + '.nc'\n", - " target_file_path = os.path.join(gridmet_folder_name, var + '_' + '%s' % y + '.nc')\n", - " if not os.path.exists(target_file_path):\n", - " download_file(download_link, target_file_path, var)\n", - " else:\n", - " print(f\"File {os.path.basename(target_file_path)} exists\")" - ] - }, - { - "cell_type": "markdown", - "id": "455b8ece", - "metadata": {}, - "source": [ - "## 3.2.1.7 Extract File Name from File Path\n", "\n", - "Here we extracts the file name from a given file path.\n", - "- `file_path`: A string representing the full path to a file.\n", - "- `file_name = os.path.basename(file_path)` uses the `os.path.basename()` function to extract the file name from the complete file path. The `basename()` function returns the last component of the path, which is the file name." + "We prepare the folder structure for storing GridMET data and obtain a list of relevant years based on the target date. This process ensures that the necessary directory exists for data storage and determines the appropriate years for data retrieval without delving into technical details." ] }, { "cell_type": "code", - "execution_count": 65, - "id": "82adf8a9", + "execution_count": 75, + "id": "1621c7c1", "metadata": {}, "outputs": [], "source": [ - "def get_file_name_from_path(file_path):\n", - " # Get the file name from the file path\n", - " file_name = os.path.basename(file_path)\n", - " return file_name" + "def add_cumulative_column(df, column_name):\n", + " df[f'cumulative_{column_name}'] = df[column_name].sum()\n", + " return df" ] }, { "cell_type": "markdown", - "id": "f0c5e17f", + "id": "100e461b", "metadata": {}, "source": [ - "## 3.2.1.8 Extract Variable Name from File Name\n", - "\n", - "The code snippet extracts the variable name from a given file name, assuming the file name follows a specific format.\n", + "## 3.2.1.7 Prepare Cumulative History CSVs\n", "\n", - "- `file_name`: A string representing the name of the file from which the variable name will be extracted.\n", - "- `var_name = str(file_name.split('_')[0])` splits the file name at the underscore and takes the first part (index `0`), which is expected to be the variable name. The `str()` function ensures that `var_name` is treated as a string." - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "id": "2704d40c", - "metadata": {}, - "outputs": [], - "source": [ - "def get_var_from_file_name(file_name):\n", - " # Assuming the file name format is \"tmmm_year.csv\"\n", - " var_name = str(file_name.split('_')[0])\n", - " return var_name" - ] - }, - { - "cell_type": "markdown", - "id": "dea2dcf9", - "metadata": {}, - "source": [ - "## 3.2.1.9 Extracts Coordinates from a CSV File\n", + "generates cumulative history CSVs for a specified target date. It processes GridMET data by traversing the date range from the previous October 1st to the target date, downloading the necessary data, converting it to CSV, and calculating cumulative values for specific meteorological variables. The results are saved in new CSV files.\n", "\n", - "Here we extracts geographical coordinates (longitude and latitude) from a CSV file and returns them as a list of tuples.\n", + "- `target_date`: The target date for generating cumulative history CSVs.\n", + "- `selected_date`: The `datetime` object representing the `target_date`.\n", + "- `past_october_1`: A `datetime` object representing October 1st of the current or previous year.\n", + "- `generated_csvs`: A list of paths to the CSV files generated for the specific date.\n", + "- `cumulative_target_path`: The file path where the cumulative CSV will be saved.\n", + "- `date_keyed_objects`: A dictionary holding CSV file paths keyed by date.\n", + "- `force`: A boolean flag indicating whether to force regeneration of cumulative CSVs.\n", "\n", - "- `coordinates`: A list initialized as empty and used to store tuples of longitude and latitude values extracted from the CSV file.\n", - "- `df`: A DataFrame created by loading the CSV file using `pandas`. It contains the data from the CSV, including the `Latitude` and `Longitude` columns.\n", - "- `lon`: Represents the longitude extracted from the current row of the DataFrame.\n", - "- `lat`: Represents the latitude extracted from the current row of the DataFrame.\n", - "- Here we iterate over each row in the DataFrame using a `for` loop: `for index, row in df.iterrows():`.\n", - "- For each row, the function extracts the `Latitude` and `Longitude` values, converting them to floating-point numbers: `lon, lat = float(row[\"Latitude\"]), float(row[\"Longitude\"])`.\n", - "- These coordinates are appended to the `coordinates` list as a tuple: `coordinates.append((lon, lat))`." + "- **Cumulative Data Analysis:** The function enables the analysis of cumulative meteorological data, such as total precipitation over a period, which is crucial for understanding long-term trends and impacts.\n", + "- **Automated Data Processing:** By automating the download, processing, and cumulative calculation steps, the function ensures that the data is prepared consistently and efficiently, reducing manual workload.\n" ] }, { "cell_type": "code", - "execution_count": 67, - "id": "a0eca065", + "execution_count": 79, + "id": "ef804f40", "metadata": {}, "outputs": [], "source": [ - "def get_coordinates_of_template_tif():\n", - " \t# Load the CSV file and extract coordinates\n", - " coordinates = []\n", - " df = pd.read_csv(dem_csv)\n", - " for index, row in df.iterrows():\n", - " # Process each row here\n", - " lon, lat = float(row[\"Latitude\"]), float(row[\"Longitude\"])\n", - " coordinates.append((lon, lat))\n", - " return coordinates" - ] - }, - { - "cell_type": "markdown", - "id": "a6869942", - "metadata": {}, - "source": [ - "## 3.2.1.10 Find the Nearest Index in an Array\n", + "def prepare_cumulative_history_csvs(target_date=test_start_date, force=False):\n", + " \"\"\"\n", + " Prepare cumulative history CSVs for a specified target date.\n", "\n", - "- `array`: A `numpy` array of numerical values from which the closest element to `value` is to be found.\n", - "- `value`: A numerical value for which the closest corresponding element in `array` is sought.\n", + " Parameters:\n", + " - target_date (str, optional): The target date in the format 'YYYY-MM-DD'. Default is 'test_start_date'.\n", + " - force (bool, optional): If True, forcefully regenerate cumulative CSVs even if they already exist. Default is False.\n", "\n", - "The code snippet returns the `index` of the element in the array that is closest to the given `value`." - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "id": "6d20022d", - "metadata": {}, - "outputs": [], - "source": [ - "def find_nearest_index(array, value):\n", - " # Find the index of the element in the array that is closest to the given value\n", - " return (abs(array - value)).argmin()" - ] - }, - { - "cell_type": "markdown", - "id": "9ba26855", - "metadata": {}, - "source": [ - "## 3.2.1.11 Create a GridMET to DEM Mapper\n", + " Returns:\n", + " None\n", "\n", - "Here we generates a mapping between the coordinates in a DEM (Digital Elevation Model) and the corresponding coordinates in a GridMET dataset, saving the result to a CSV file.\n", + " This function generates cumulative history CSVs for a specified target date. It traverses the date range from the past\n", + " October 1 to the target date, downloads gridmet data, converts it to CSV, and merges it into a big DataFrame.\n", + " The cumulative values are calculated and saved in new CSV files.\n", "\n", - "- `nc_file`: A NetCDF file containing GridMET data, from which latitude and longitude arrays are extracted.\n", - "- `western_us_dem_df`: A DataFrame containing DEM coordinates loaded from a CSV file.\n", - "- `target_csv_path`: A string representing the file path where the resulting mapping CSV will be saved.\n", - "- `latitudes`: A `numpy` array of latitude values extracted from the NetCDF file.\n", - "- `longitudes`: A `numpy` array of longitude values extracted from the NetCDF file.\n", - "- `get_gridmet_var_value(row)`: A function that finds the nearest GridMET coordinates for a given DEM coordinate and returns those coordinates along with their indices.\n", + " Example:\n", + " ```python\n", + " prepare_cumulative_history_csvs(target_date='2023-01-01', force=True)\n", + " ```\n", + "\n", + " Note: This function assumes the existence of the following helper functions:\n", + " - download_gridmet_of_specific_variables\n", + " - prepare_folder_and_get_year_list\n", + " - turn_gridmet_nc_to_csv\n", + " - add_cumulative_column\n", + " - process_group_value_filling\n", + " ```\n", + "\n", + " selected_date = datetime.strptime(target_date, \"%Y-%m-%d\")\n", + " print(selected_date)\n", + " if selected_date.month < 10:\n", + " past_october_1 = datetime(selected_date.year - 1, 10, 1)\n", + " else:\n", + " past_october_1 = datetime(selected_date.year, 10, 1)\n", + "\n", + " # Rest of the function logic...\n", + "\n", + " filled_data = filled_data.loc[:, ['Latitude', 'Longitude', var_name, f'cumulative_{var_name}']]\n", + " print(\"new_df final shape: \", filled_data.head())\n", + " filled_data.to_csv(cumulative_target_path, index=False)\n", + " print(f\"new df is saved to {cumulative_target_path}\")\n", + " print(filled_data.describe())\n", + " ```\n", + "Note: This docstring includes placeholders such as \"download_gridmet_of_specific_variables\" and \"prepare_folder_and_get_year_list\" for the assumed existence of related helper functions. You should replace these placeholders with actual documentation for those functions.\n", + " \"\"\"\n", + " selected_date = datetime.strptime(target_date, \"%Y-%m-%d\")\n", + " print(selected_date)\n", + " if selected_date.month < 10:\n", + " past_october_1 = datetime(selected_date.year - 1, 10, 1)\n", + " else:\n", + " past_october_1 = datetime(selected_date.year, 10, 1)\n", + "\n", + " # Traverse and print every day from past October 1 to the specific date\n", + " current_date = past_october_1\n", + " \n", + " date_keyed_objects = {}\n", + " download_gridmet_of_specific_variables(\n", + " prepare_folder_and_get_year_list(target_date=target_date)\n", + " )\n", + " # Set the specific date you want to process\n", + " # current_date = datetime.strptime(\"2024-08-11\", '%Y-%m-%d')\n", + " current_date = datetime.strptime(\"2024-08-11\", '%Y-%m-%d')\n", + "\n", + " # Initialize a dictionary to hold the date-keyed objects\n", + " date_keyed_objects = {}\n", + "\n", + " # Process the specific date\n", + " print('Processing date:', current_date.strftime('%Y-%m-%d'))\n", + " current_date_str = current_date.strftime('%Y-%m-%d')\n", + "\n", + " # Call the function to generate CSVs for the specific date\n", + " generated_csvs = turn_gridmet_nc_to_csv(target_date=current_date_str)\n", + "\n", + " # Read the CSV into a dataframe and store it in the dictionary\n", + " date_keyed_objects[current_date_str] = generated_csvs\n", + "\n", + " print(\"Processing complete for the date:\", current_date_str)\n", + " target_date = '2024-08-11'\n", + " target_generated_csvs = date_keyed_objects[target_date]\n", + " for index, single_csv in enumerate(target_generated_csvs):\n", + " # traverse the variables of gridmet here\n", + " # each variable is a loop\n", + " print(f\"creating cumulative for {single_csv}\")\n", + " \n", + " cumulative_target_path = f\"{single_csv}_cumulative.csv\"\n", + " print(\"cumulative_target_path = \", cumulative_target_path)\n", + " \n", + " if os.path.exists(cumulative_target_path) and not force:\n", + " print(f\"{cumulative_target_path} already exists, skipping..\")\n", + " continue\n", + " \n", + " # Extract the file name without extension\n", + " file_name = os.path.splitext(os.path.basename(single_csv))[0]\n", + " gap_filled_csv = f\"{cumulative_target_path}_gap_filled.csv\"\n", + "\n", + "\t# Split the file name using underscores\n", + " var_name = file_name.split('_')[1]\n", + " print(f\"Found variable name {var_name}\")\n", + " current_date = past_october_1\n", + " new_df = pd.read_csv(single_csv)\n", + " print(new_df.head())\n", + " \n", + " all_df = pd.read_csv(f\"../data/gridmet_test_run/testing_output/{str(selected_date.year)}_{var_name}_{target_date}.csv\")\n", + " all_df[\"date\"] = target_date\n", + " all_df[var_name] = pd.to_numeric(all_df[var_name], errors='coerce')\n", + " filled_data = all_df\n", + " filled_data = filled_data[(filled_data['date'] == target_date)]\n", + " filled_data.fillna(0, inplace=True)\n", + " print(\"Finished correctly \", filled_data.head())\n", + " filled_data = filled_data[['Latitude', 'Longitude', \n", + " var_name, \n", + "# f'cumulative_{var_name}'\n", + " ]]\n", + " print(filled_data.shape)\n", + " filled_data.to_csv(cumulative_target_path, index=False)\n", + " print(f\"new df is saved to {cumulative_target_path}\")\n", + " print(filled_data.describe())\n" + ] + }, + { + "cell_type": "markdown", + "id": "aa482514", + "metadata": {}, + "source": [ + "## 3.2.1.8 Running the Workflow for GridMET Data Processing\n", + "\n", + "This script sets up the necessary directories and file paths for processing GridMET data, then runs the cumulative history CSV preparation function to generate cumulative data for meteorological variables.\n", + "\n", + "- `homedir = os.path.expanduser('~')`: Expands the tilde (`~`) to the full path of the user's home directory.\n", + "- `work_dir`: Defines the main working directory for the project, where all data processing will take place.\n", + "- `gridmet_folder_name`: Specifies the folder within the working directory where the GridMET climatology data will be stored.\n", + "- `western_us_coords`: Points to a CSV file containing the coordinates for the western U.S., derived from a DEM file.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "d202cb61", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The existing netcdf already covers the selected date. Avoid downloading..\n", + "File tmmn_2024.nc exists\n", + "File tmmn_2023.nc exists\n", + "File tmmx_2024.nc exists\n", + "File tmmx_2023.nc exists\n", + "File pr_2024.nc exists\n", + "File pr_2023.nc exists\n", + "Checking file: tmmn_2024.nc\n", + "File 2024_tmmn_2024-08-13.csv exists\n", + "Checking file: tmmx_2024.nc\n", + "File 2024_tmmx_2024-08-13.csv exists\n", + "Checking file: pr_2024.nc\n", + "File 2024_pr_2024-08-13.csv exists\n", + "test image is saved at ../data/gridmet_test_run/testing_output/2024_pr_2024-08-13.png\n", + "2024-08-13 00:00:00\n", + "The existing netcdf already covers the selected date. Avoid downloading..\n", + "File tmmn_2024.nc exists\n", + "File tmmn_2023.nc exists\n", + "File tmmx_2024.nc exists\n", + "File tmmx_2023.nc exists\n", + "File pr_2024.nc exists\n", + "File pr_2023.nc exists\n", + "Processing date: 2024-08-11\n", + "Checking file: tmmn_2024.nc\n", + "File 2024_tmmn_2024-08-11.csv exists\n", + "Checking file: tmmx_2024.nc\n", + "File 2024_tmmx_2024-08-11.csv exists\n", + "Checking file: pr_2024.nc\n", + "File 2024_pr_2024-08-11.csv exists\n", + "Processing complete for the date: 2024-08-11\n", + "creating cumulative for ../data/gridmet_test_run/testing_output/2024_tmmn_2024-08-11.csv\n", + "cumulative_target_path = ../data/gridmet_test_run/testing_output/2024_tmmn_2024-08-11.csv_cumulative.csv\n", + "Found variable name tmmn\n", + " Latitude Longitude tmmn\n", + "0 49.0 -125.000 --\n", + "1 49.0 -124.964 --\n", + "2 49.0 -124.928 --\n", + "3 49.0 -124.892 --\n", + "4 49.0 -124.856 --\n", + "Finished correctly Latitude Longitude tmmn date\n", + "0 49.0 -125.000 0.0 2024-08-11\n", + "1 49.0 -124.964 0.0 2024-08-11\n", + "2 49.0 -124.928 0.0 2024-08-11\n", + "3 49.0 -124.892 0.0 2024-08-11\n", + "4 49.0 -124.856 0.0 2024-08-11\n", + "(462204, 3)\n", + "new df is saved to ../data/gridmet_test_run/testing_output/2024_tmmn_2024-08-11.csv_cumulative.csv\n", + " Latitude Longitude tmmn\n", + "count 462204.000000 462204.00000 462204.000000\n", + "mean 37.030000 -112.52600 193.127659\n", + "std 6.921275 7.21226 135.027899\n", + "min 25.060000 -125.00000 0.000000\n", + "25% 31.036000 -118.77200 0.000000\n", + "50% 37.030000 -112.52600 283.600000\n", + "75% 43.024000 -106.28000 288.100000\n", + "max 49.000000 -100.05200 310.900000\n", + "creating cumulative for ../data/gridmet_test_run/testing_output/2024_tmmx_2024-08-11.csv\n", + "cumulative_target_path = ../data/gridmet_test_run/testing_output/2024_tmmx_2024-08-11.csv_cumulative.csv\n", + "Found variable name tmmx\n", + " Latitude Longitude tmmx\n", + "0 49.0 -125.000 --\n", + "1 49.0 -124.964 --\n", + "2 49.0 -124.928 --\n", + "3 49.0 -124.892 --\n", + "4 49.0 -124.856 --\n", + "Finished correctly Latitude Longitude tmmx date\n", + "0 49.0 -125.000 0.0 2024-08-11\n", + "1 49.0 -124.964 0.0 2024-08-11\n", + "2 49.0 -124.928 0.0 2024-08-11\n", + "3 49.0 -124.892 0.0 2024-08-11\n", + "4 49.0 -124.856 0.0 2024-08-11\n", + "(462204, 3)\n", + "new df is saved to ../data/gridmet_test_run/testing_output/2024_tmmx_2024-08-11.csv_cumulative.csv\n", + " Latitude Longitude tmmx\n", + "count 462204.000000 462204.00000 462204.000000\n", + "mean 37.030000 -112.52600 203.360382\n", + "std 6.921275 7.21226 142.170743\n", + "min 25.060000 -125.00000 0.000000\n", + "25% 31.036000 -118.77200 0.000000\n", + "50% 37.030000 -112.52600 298.800000\n", + "75% 43.024000 -106.28000 304.500000\n", + "max 49.000000 -100.05200 322.200000\n", + "creating cumulative for ../data/gridmet_test_run/testing_output/2024_pr_2024-08-11.csv\n", + "cumulative_target_path = ../data/gridmet_test_run/testing_output/2024_pr_2024-08-11.csv_cumulative.csv\n", + "Found variable name pr\n", + " Latitude Longitude pr\n", + "0 49.0 -125.000 --\n", + "1 49.0 -124.964 --\n", + "2 49.0 -124.928 --\n", + "3 49.0 -124.892 --\n", + "4 49.0 -124.856 --\n", + "Finished correctly Latitude Longitude pr date\n", + "0 49.0 -125.000 0.0 2024-08-11\n", + "1 49.0 -124.964 0.0 2024-08-11\n", + "2 49.0 -124.928 0.0 2024-08-11\n", + "3 49.0 -124.892 0.0 2024-08-11\n", + "4 49.0 -124.856 0.0 2024-08-11\n", + "(462204, 3)\n", + "new df is saved to ../data/gridmet_test_run/testing_output/2024_pr_2024-08-11.csv_cumulative.csv\n", + " Latitude Longitude pr\n", + "count 462204.000000 462204.00000 462204.000000\n", + "mean 37.030000 -112.52600 0.883495\n", + "std 6.921275 7.21226 2.777075\n", + "min 25.060000 -125.00000 0.000000\n", + "25% 31.036000 -118.77200 0.000000\n", + "50% 37.030000 -112.52600 0.000000\n", + "75% 43.024000 -106.28000 0.000000\n", + "max 49.000000 -100.05200 45.400000\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "gridmet_folder_name = \"../data/gridmet_test_run/gridmet_climatology\"\n", + "western_us_coords = \"../data/dem_file.tif.csv\"\n", + "\n", + "# Run the download function\n", + "download_gridmet_of_specific_variables(prepare_folder_and_get_year_list())\n", + "turn_gridmet_nc_to_csv()\n", + "plot_gridmet()\n", + "\n", + "# prepare testing data with cumulative variables\n", + "prepare_cumulative_history_csvs(force=True)" + ] + }, + { + "cell_type": "markdown", + "id": "68f2bea0", + "metadata": {}, + "source": [ + "## 3.2.2 Merging Meteorological Data for Comprehensive Analysis\n", + "\n", + "In this chapter, we do\n", + "- `Data Collection:` We fetch gridMET climatology data for various meteorological variables (e.g., temperature, precipitation) and multiple years. \n", "\n", - "Here we create a detailed mapping between DEM coordinates and GridMET coordinates, facilitating the integration of data from different sources. This is crucial for tasks like spatial analysis, where accurate alignment between datasets is required." + "- `Data Processing:` After downloading, we extract relevant data for specific geographical locations corresponding to weather stations.\n", + "\n", + "- `Data Integration:` We merge similar variables obtained from different years into separate CSV files. We then combine all variables together into a single comprehensive dataset for further analysis and modeling tasks." + ] + }, + { + "cell_type": "markdown", + "id": "66524b4e", + "metadata": {}, + "source": [ + "## 3.2.2.1 Extracting and Saving Data from a NetCDF File\n", + "\n", + "Here we read data from a NetCDF file, extracts specific variables, and saves the data as a CSV file.\n", + "\n", + "- `ds = xr.open_dataset(file_name)`: Opens the NetCDF file for reading.\n", + "- `var_to_extract = list(ds.keys())`: Extracts the variable names present in the dataset.\n", + "- `var_name = var_to_extract[0]`: Selects the first variable from the list for further processing.\n", + "\n", + "For each station (defined in `stations`), we extract the latitude (`lat`) and longitude (`lon`).\n", + "Using `ds.sel(lat=lat, lon=lon, method='nearest')`, we select the data nearest to the specified latitude and longitude and then convert the subset data into a DataFrame." ] }, { "cell_type": "code", - "execution_count": 69, - "id": "6173cfa8", + "execution_count": 6, + "id": "3b4e75e6", "metadata": {}, "outputs": [], "source": [ - "def create_gridmet_to_dem_mapper(nc_file):\n", - " western_us_dem_df = pd.read_csv(western_us_coords)\n", - " # Check if the CSV already exists\n", - " target_csv_path = f'{work_dir}/gridmet_to_dem_mapper.csv'\n", - " if os.path.exists(target_csv_path):\n", - " print(f\"File {target_csv_path} already exists, skipping..\")\n", - " return\n", + "def get_gridmet_variable(file_name):\n", + " print(f\"reading values from {file_name}\")\n", + " result_data = []\n", + " ds = xr.open_dataset(file_name)\n", + " var_to_extract = list(ds.keys())\n", + " print(var_to_extract)\n", + " var_name = var_to_extract[0]\n", " \n", - " # get the netcdf file and generate the csv file for every coordinate in the dem_template.csv\n", - " selected_date = datetime.strptime(test_start_date, \"%Y-%m-%d\")\n", - " # Read the NetCDF file\n", - " with nc.Dataset(nc_file) as nc_file:\n", - " \n", - " # Get the values at each coordinate using rasterio's sample function\n", - " latitudes = nc_file.variables['lat'][:]\n", - " longitudes = nc_file.variables['lon'][:]\n", - " \n", - " def get_gridmet_var_value(row):\n", - " # Perform your custom calculation here\n", - " gridmet_lat_index = find_nearest_index(latitudes, float(row[\"Latitude\"]))\n", - " gridmet_lon_index = find_nearest_index(longitudes, float(row[\"Longitude\"]))\n", - " return latitudes[gridmet_lat_index], longitudes[gridmet_lon_index], gridmet_lat_index, gridmet_lon_index\n", + " df = pd.DataFrame(columns=['day', 'lat', 'lon', var_name])\n", " \n", - " # Use the apply function to apply the custom function to each row\n", - " western_us_dem_df[['gridmet_lat', 'gridmet_lon', \n", - " 'gridmet_lat_idx', 'gridmet_lon_idx',]] = western_us_dem_df.apply(lambda row: pd.Series(get_gridmet_var_value(row)), axis=1)\n", - " western_us_dem_df.rename(columns={\"Latitude\": \"dem_lat\", \n", - " \"Longitude\": \"dem_lon\"}, inplace=True)\n", + " csv_file = f'{gridmet_save_location}/{Path(file_name).stem}.csv'\n", + " if os.path.exists(csv_file):\n", + " \tprint(f\"The file '{csv_file}' exists.\")\n", + " \treturn\n", + "\n", + " for idx, row in stations.iterrows():\n", + " lat = row['latitude']\n", + " lon = row['longitude']\n", + "\t\t\n", + " subset_data = ds.sel(lat=lat, lon=lon, method='nearest')\n", + " subset_data['lat'] = lat\n", + " subset_data['lon'] = lon\n", + " converted_df = subset_data.to_dataframe()\n", + " converted_df = converted_df.reset_index(drop=False)\n", + " converted_df = converted_df.drop('crs', axis=1)\n", + " df = pd.concat([df, converted_df], ignore_index=True)\n", + " \n", + " result_df = df\n", + " print(\"got result_df : \", result_df.head())\n", + " result_df.to_csv(csv_file, index=False)\n", + " print(f'completed extracting data for {file_name}')" + ] + }, + { + "cell_type": "markdown", + "id": "a20501ee", + "metadata": {}, + "source": [ + "## 3.2.2.2 Merge Similar Variables from Different Years\n", + "\n", + "Here we merge CSV files containing similar variables but from different years. The merged data is saved as a single CSV file for each variable.\n", + "\n", + "- `file_groups = {}`: Initializes an empty dictionary to store grouped files.\n", + "- For each file, we extract the base name (variable name) and year, then groups files by the variable name if they end with `.csv`.\n", + "- And then Files are grouped based on the part of the filename before the year, ensuring that all files related to the same variable are grouped together.\n", + "\n", + "- Here we start by listing all the files in the specified `gridmet_save_location` directory.\n", + "- And then we group the files by their base names (e.g., `temperature`, `precipitation`) by splitting the filename at the underscore (`_`) and checking if the file is a CSV file with a valid year.\n", + "- For each group of files (i.e., files with the same variable but from different years), the function reads the first 5 files into pandas DataFrames and merges them into a single DataFrame.\n", + "- The merged DataFrame is saved as a new CSV file with the base name followed by `_merged.csv`." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5798ad94", + "metadata": {}, + "outputs": [], + "source": [ + "def merge_similar_variables_from_different_years():\n", + " files = os.listdir(gridmet_save_location)\n", + " file_groups = {}\n", + "\n", + " for filename in files:\n", + " base_name, year_ext = os.path.splitext(filename)\n", + " parts = base_name.split('_')\n", + " if len(parts) == 2 and year_ext == '.csv':\n", + " file_groups.setdefault(parts[0], []).append(filename)\n", + "\n", + " for base_name, file_list in file_groups.items():\n", + " if len(file_list) > 1:\n", + " dfs = []\n", + " for filename in file_list[:5]:\n", + " df = pd.read_csv(os.path.join(gridmet_save_location, filename))\n", + " dfs.append(df)\n", + " merged_df = pd.concat(dfs, ignore_index=True)\n", + " merged_filename = f\"{base_name}_merged.csv\"\n", + " merged_df.to_csv(os.path.join(gridmet_save_location, merged_filename), index=False)\n", + " print(f\"Merged {file_list} into {merged_filename}\")" + ] + }, + { + "cell_type": "markdown", + "id": "bc7f03a0", + "metadata": {}, + "source": [ + "## 3.2.2.3 Merge All Variables Together\n", + "\n", + "Paths for specific variables (`rmin`, `rmax`, `tmmn`, `tmmx`) are explicitly defined, and their respective CSV files are loaded into DataFrames.\n", + "\n", + "The columns in the specific DataFrames are renamed to distinguish between similar variable names (e.g., `relative_humidity` for `rmin` and `rmax`)." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "35fff41c", + "metadata": {}, + "outputs": [], + "source": [ + "def merge_all_variables_together():\n", + " merged_df = None\n", + " file_paths = []\n", + "\n", + " for filename in os.listdir(gridmet_save_location)[1:3]:\n", + " if filename.endswith(\"_merged.csv\"):\n", + " file_paths.append(os.path.join(gridmet_save_location, filename))\n", + "\t\n", + " rmin_merged_path = os.path.join(gridmet_save_location, 'rmin_merged.csv')\n", + " rmax_merged_path = os.path.join(gridmet_save_location, 'rmax_merged.csv')\n", + " tmmn_merged_path = os.path.join(gridmet_save_location, 'tmmn_merged.csv')\n", + " tmmx_merged_path = os.path.join(gridmet_save_location, 'tmmx_merged.csv')\n", " \n", - " # Save the new converted AMSR to CSV file\n", - " western_us_dem_df.to_csv(target_csv_path, index=False)\n", + " df_rmin = pd.read_csv(rmin_merged_path)\n", + " df_rmax = pd.read_csv(rmax_merged_path , engine='python')\n", + " df_tmmn = pd.read_csv(tmmn_merged_path)\n", + " df_tmmx = pd.read_csv(tmmx_merged_path)\n", " \n", - " return western_us_dem_df" + " df_rmin.rename(columns={'relative_humidity': 'relative_humidity_rmin'}, inplace=True)\n", + " df_rmax.rename(columns={'relative_humidity': 'relative_humidity_rmax'}, inplace=True)\n", + " df_tmmn.rename(columns={'air_temperature': 'air_temperature_tmmn'}, inplace=True)\n", + " df_tmmx.rename(columns={'air_temperature': 'air_temperature_tmmx'}, inplace=True)\n", + " \n", + " df_rmin.to_csv(os.path.join(gridmet_save_location, 'rmin_merged.csv'))\n", + " df_rmax.to_csv(os.path.join(gridmet_save_location, 'rmax_merged.csv'))\n", + " df_tmmn.to_csv(os.path.join(gridmet_save_location, 'tmmn_merged.csv'))\n", + " df_tmmx.to_csv(os.path.join(gridmet_save_location, 'tmmx_merged.csv'))\n", + " \n", + " if file_paths:\n", + " merged_df = pd.read_csv(file_paths[0])\n", + " for file_path in file_paths[1:3]:\n", + " df = pd.read_csv(file_path)\n", + " merged_df = pd.concat([merged_df, df], axis=1)\n", + " merged_df = merged_df.loc[:, ~merged_df.columns.duplicated()]\n", + " merged_df.to_csv(final_merged_csv, index=False)\n" + ] + }, + { + "cell_type": "markdown", + "id": "999d5177", + "metadata": {}, + "source": [ + "## 3.2.2.4 Workflow for Processing GridMET Climatology Data\n", + "\n", + "- The `download_gridmet_climatology()` function is called to download the necessary climatology data files for various variables.\n", + "- `nc_files = get_files_in_directory()`: This retrieves a list of all NetCDF files downloaded in the previous step.\n", + "- A loop iterates over the first five NetCDF files in the list (`nc_files`), printing each file's name and extracting data using the `get_gridmet_variable(nc)` function.\n", + "- `merge_similar_variables_from_different_years()`: This function is called to merge data files containing the same variable but from different years into a single file for each variable.\n", + "- `merge_all_variables_together()`: Finally, this function combines the merged data files for different variables into one comprehensive dataset, allowing for integrated analysis." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "33d38173", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "downloading http://www.northwestknowledge.net/metdata/data/tmmn_2021.nc\n", + "downloading http://www.northwestknowledge.net/metdata/data/tmmx_2021.nc\n", + "downloading http://www.northwestknowledge.net/metdata/data/pr_2021.nc\n", + "downloading http://www.northwestknowledge.net/metdata/data/vpd_2021.nc\n", + "downloading http://www.northwestknowledge.net/metdata/data/etr_2021.nc\n", + "downloading http://www.northwestknowledge.net/metdata/data/rmax_2021.nc\n", + "downloading http://www.northwestknowledge.net/metdata/data/rmin_2021.nc\n", + "downloading http://www.northwestknowledge.net/metdata/data/vs_2021.nc\n", + "reading values from ../data/gridmet_test_run/gridmet_climatology/rmin_2021.nc\n", + "['relative_humidity']\n", + "The file '../data/gridmet_test_run/gridmet_climatology/rmin_2021.csv' exists.\n", + "reading values from ../data/gridmet_test_run/gridmet_climatology/tmmn_2021.nc\n", + "['air_temperature']\n", + "The file '../data/gridmet_test_run/gridmet_climatology/tmmn_2021.csv' exists.\n", + "reading values from ../data/gridmet_test_run/gridmet_climatology/etr_2021.nc\n", + "['potential_evapotranspiration']\n", + "The file '../data/gridmet_test_run/gridmet_climatology/etr_2021.csv' exists.\n", + "reading values from ../data/gridmet_test_run/gridmet_climatology/etr_2020.nc\n", + "['potential_evapotranspiration']\n", + "The file '../data/gridmet_test_run/gridmet_climatology/etr_2020.csv' exists.\n", + "reading values from ../data/gridmet_test_run/gridmet_climatology/rmin_2020.nc\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/homebrew/lib/python3.10/site-packages/xarray/backends/plugins.py:80: RuntimeWarning: Engine 'cfgrib' loading failed:\n", + "Cannot find the ecCodes library\n", + " warnings.warn(f\"Engine {name!r} loading failed:\\n{ex}\", RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['relative_humidity']\n", + "The file '../data/gridmet_test_run/gridmet_climatology/rmin_2020.csv' exists.\n", + "Merged ['tmmn_2020.csv', 'tmmn_2021.csv', 'tmmn_merged.csv'] into tmmn_merged.csv\n", + "Merged ['vs_merged.csv', 'vs_2020.csv', 'vs_2021.csv'] into vs_merged.csv\n", + "Merged ['rmax_merged.csv', 'rmax_2020.csv', 'rmax_2021.csv'] into rmax_merged.csv\n", + "Merged ['etr_merged.csv', 'etr_2020.csv', 'etr_2021.csv'] into etr_merged.csv\n", + "Merged ['vpd_merged.csv', 'vpd_2020.csv', 'vpd_2021.csv'] into vpd_merged.csv\n", + "Merged ['tmmx_2021.csv', 'tmmx_2020.csv', 'tmmx_merged.csv'] into tmmx_merged.csv\n", + "Merged ['pr_2021.csv', 'pr_2020.csv', 'pr_merged.csv'] into pr_merged.csv\n", + "Merged ['rmin_2021.csv', 'rmin_merged.csv', 'rmin_2020.csv'] into rmin_merged.csv\n" + ] + } + ], + "source": [ + "download_gridmet_climatology()\n", + "\n", + "nc_files = get_files_in_directory()\n", + "for nc in nc_files[:5]:\n", + " get_gridmet_variable(nc)\n", + "merge_similar_variables_from_different_years()\n", + "merge_all_variables_together()" ] }, { "cell_type": "markdown", - "id": "43f58a20", + "id": "3b84623c", "metadata": {}, "source": [ - "## 3.2.1.12 Calculate specific Operation Day\n", + "## Utility Functions for GridMET Climatology Data Downloader" + ] + }, + { + "cell_type": "markdown", + "id": "fba2568a", + "metadata": {}, + "source": [ + "## 3.2.1.1 Setup and Variable Mapping\n", "\n", - "Here we calculate the date exactly three days before the current date and returns it as a formatted string.\n", + "The following code snippet sets up the environment by importing necessary libraries, defining a workspace, and mapping variables.\n", "\n", - "- `current_date`: A `datetime` object representing the current date and time.\n", - "- `three_days_ago`: A `datetime` object representing the date three days before the current date.\n", - "- `three_days_ago_string`: A string representing the date three days ago, formatted as \"YYYY-MM-DD\".\n", - "- `test_start_date`: A string that stores the returned value from `get_operation_day()`, representing the operation day used in other parts of the code." + "- `gridmet_var_mapping`: A dictionary that associates short-form variable names with their full descriptive names.\n", + "\n", + "- The `colors` list represents a gradient of colors associated with specific ranges of values" ] }, { "cell_type": "code", - "execution_count": 70, - "id": "6dad59c8", + "execution_count": 1, + "id": "501f1e93", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-08-13\n" - ] - } - ], + "outputs": [], "source": [ - "from datetime import datetime, timedelta\n", - "def get_operation_day():\n", - " # Get the current date and time\n", - " current_date = datetime.now()\n", - "\n", - " # Calculate three days ago\n", - " three_days_ago = current_date - timedelta(days=3)\n", + "## Utility Functions\n", "\n", - " # Format the date as a string\n", - " three_days_ago_string = three_days_ago.strftime(\"%Y-%m-%d\")\n", + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "import netCDF4 as nc\n", + "import urllib.request\n", + "from datetime import datetime, timedelta, date\n", + "import matplotlib.pyplot as plt\n", "\n", - " print(three_days_ago_string)\n", + "work_dir = \"../data/gridmet_test_run\"\n", "\n", - " return three_days_ago_string\n", + "gridmet_var_mapping = {\n", + " \"etr\": \"potential_evapotranspiration\",\n", + " \"pr\":\"precipitation_amount\",\n", + " \"rmax\":\"relative_humidity\",\n", + " \"rmin\":\"relative_humidity\",\n", + " \"tmmn\":\"air_temperature\",\n", + " \"tmmx\":\"air_temperature\",\n", + " \"vpd\":\"mean_vapor_pressure_deficit\",\n", + " \"vs\":\"wind_speed\",\n", + "}\n", "\n", - "test_start_date = get_operation_day()" + "colors = [\n", + " (0.8627, 0.8627, 0.8627), # #DCDCDC - 0 - 1\n", + " (0.8627, 1.0000, 1.0000), # #DCFFFF - 1 - 2\n", + " (0.6000, 1.0000, 1.0000), # #99FFFF - 2 - 4\n", + " (0.5569, 0.8235, 1.0000), # #8ED2FF - 4 - 6\n", + " (0.4509, 0.6196, 0.8745), # #739EDF - 6 - 8\n", + " (0.4157, 0.4706, 1.0000), # #6A78FF - 8 - 10\n", + " (0.4235, 0.2784, 1.0000), # #6C47FF - 10 - 12\n", + " (0.5529, 0.0980, 1.0000), # #8D19FF - 12 - 14\n", + " (0.7333, 0.0000, 0.9176), # #BB00EA - 14 - 16\n", + " (0.8392, 0.0000, 0.7490), # #D600BF - 16 - 18\n", + " (0.7569, 0.0039, 0.4549), # #C10074 - 18 - 20\n", + " (0.6784, 0.0000, 0.1961), # #AD0032 - 20 - 30\n", + " (0.5020, 0.0000, 0.0000) # #800000 - > 30\n", + "]\n" ] }, { "cell_type": "markdown", - "id": "e44f963a", + "id": "b80cd848", "metadata": {}, "source": [ - "## 3.2.1.13 Extracts NetCDF Data by Coordinates and Variable\n", + "## 3.2.1.2 Map Values to Colors\n", "\n", - "The following code extracts specific meteorological data from a NetCDF file based on provided coordinates and a variable name, and returns the data in a pandas DataFrame.\n", + "Here we generate a color mapping for a given column of data based on specified or automatically calculated value ranges. It returns the color mapping and the value ranges used.\n", "\n", - "- `mapper_df`: A DataFrame containing the mapping between DEM coordinates and GridMET coordinates.\n", - "- `latitudes`: A `numpy` array of latitude values from the NetCDF file.\n", - "- `longitudes`: A `numpy` array of longitude values from the NetCDF file.\n", - "- `var_col`: The data array for the selected variable from the NetCDF file.\n", - "- `get_gridmet_var_value(row)`: Here we extract the variable value for each coordinate from the NetCDF data.\n", - "- `Latitude`, `Longitude`: The final latitude and longitude columns in the returned DataFrame.\n", - "- Here we automate the process of extracting specific meteorological data from a NetCDF file based on geospatial coordinates, allowing for detailed analysis of climate variables at specific locations.\n", - "- By leveraging a pre-generated mapping (from DEM to GridMET coordinates `3.1.10`), the function efficiently retrieves the data for the exact locations of interest, making it highly useful in spatial analysis and modeling tasks.\n" + "- `df_col` (required): The data column from a DataFrame to map to colors.\n", + "- `value_ranges` (optional): A list of value ranges to determine the mapping of data values to colors. If not provided, the function calculates the ranges automatically.\n", + "- `map_value_to_color` takes a value and maps it to a color based on the `new_value_ranges`.\n", + "- It iterates through the ranges, assigning a color to the value based on which range it falls into.\n", + "- If the value is greater than all the defined ranges, the last color in the list is used.\n", + "\n", + "we get the `color_mapping` (list of colors corresponding to each value in the data column) and `new_value_ranges` (the calculated or provided value ranges)." ] }, { "cell_type": "code", - "execution_count": 71, - "id": "c09fdd0c", + "execution_count": 3, + "id": "695f9901", "metadata": {}, "outputs": [], "source": [ - "def get_nc_csv_by_coords_and_variable(nc_file, var_name, target_date=test_start_date):\n", - " \n", - " create_gridmet_to_dem_mapper(nc_file)\n", - " \t\n", - " mapper_df = pd.read_csv(f'{work_dir}/gridmet_to_dem_mapper.csv')\n", - " \n", - " # get the netcdf file and generate the csv file for every coordinate in the dem_template.csv\n", - " selected_date = datetime.strptime(target_date, \"%Y-%m-%d\")\n", - " # Read the NetCDF file\n", - " with nc.Dataset(nc_file) as nc_file:\n", - " # Get a list of all variables in the NetCDF file\n", - " variables = nc_file.variables.keys()\n", - " \n", - " # Get the values at each coordinate using rasterio's sample function\n", - " latitudes = nc_file.variables['lat'][:]\n", - " longitudes = nc_file.variables['lon'][:]\n", - " day = nc_file.variables['day'][:]\n", - " long_var_name = gridmet_var_mapping[var_name]\n", - " var_col = nc_file.variables[long_var_name][:]\n", + "def create_color_maps_with_value_range(df_col, value_ranges=None):\n", + " if value_ranges == None:\n", + " max_value = df_col.max()\n", + " min_value = df_col.min()\n", + " if min_value < 0:\n", + " min_value = 0\n", + " step_size = (max_value - min_value) / 12\n", "\n", - " \n", - " # Calculate the day of the year\n", - " day_of_year = selected_date.timetuple().tm_yday\n", - " day_index = day_of_year - 1\n", - " \n", - " def get_gridmet_var_value(row):\n", - " # Perform your custom calculation here\n", - " lat_index = int(row[\"gridmet_lat_idx\"])\n", - " lon_index = int(row[\"gridmet_lon_idx\"])\n", - " var_value = var_col[day_index, lat_index, lon_index]\n", - " \n", - " return var_value\n", - " \n", - " # Use the apply function to apply the custom function to each row\n", - " mapper_df[var_name] = mapper_df.apply(get_gridmet_var_value, axis=1)\n", - " \n", - " # drop useless columns\n", - " mapper_df = mapper_df[[\"dem_lat\", \"dem_lon\", var_name]]\n", - " mapper_df.rename(columns={\"dem_lat\": \"Latitude\",\n", - " \"dem_lon\": \"Longitude\"}, inplace=True)\n", - " return mapper_df" + " # Create 10 periods\n", + " new_value_ranges = [min_value + i * step_size for i in range(12)]\n", + " # Define your custom function to map data values to colors\n", + " def map_value_to_color(value):\n", + " # Iterate through the value ranges to find the appropriate color index\n", + " for i, range_max in enumerate(new_value_ranges):\n", + " if value <= range_max:\n", + " return colors[i]\n", + "\n", + " # If the value is greater than the largest range, return the last color\n", + " return colors[-1]\n", + "\n", + " # Map predicted_swe values to colors using the custom function\n", + " color_mapping = [map_value_to_color(value) for value in df_col.values]\n", + " return color_mapping, new_value_ranges" ] }, { "cell_type": "markdown", - "id": "6c685758", + "id": "8267ed30", "metadata": {}, "source": [ - "## 3.2.1.14 Converts GridMET NetCDF Files to CSV\n", + "## 3.2.1.3 Retrive the Current Year\n", "\n", - "Here we converts NetCDF files containing GridMET meteorological data into CSV files for a specific date. It processes each NetCDF file in a specified directory and extracts relevant data based on the date provided.\n", + "The following code snippet retrives the current year from the system's date and time." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3f196690", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The history saving thread hit an unexpected error (OperationalError('unable to open database file')).History will not be written to the database.\n" + ] + } + ], + "source": [ + "def get_current_year():\n", + " \"\"\"\n", + " Get the current year.\n", "\n", - "- `selected_date`: A `datetime` object representing the `target_date`.\n", - "- `generated_csvs`: A list that stores the paths to the CSV files that are generated during the function's execution.\n", - "- `res_csv`: The path where the resulting CSV file will be saved.\n", + " Returns:\n", + " int: The current year.\n", + " \"\"\"\n", + " now = datetime.now()\n", + " current_year = now.year\n", + " return current_year" + ] + }, + { + "cell_type": "markdown", + "id": "6bb255e8", + "metadata": {}, + "source": [ + "## 3.2.1.4 Removes Specific Files in a Folder\n", "\n", - "- Here we automate the process of converting multiple NetCDF files into CSV format, making it easier to handle and analyze the data outside of specialized NetCDF tools.\n", + "We remove all files within the specified folder.\n", "\n", - "- We extract data for a specific variable from a NetCDF file by matching coordinates from a DEM template CSV file. This enables us to create a DataFrame containing the variable values alongside the corresponding coordinates. \n", + "- `folder_path`: A string representing the directory where files are to be removed.\n", + "- `current_year`: An integer representing the current year, used to filter which files should be deleted.\n", + "- `files`: A list containing the names of all items (files and directories) within the specified folder.\n", + "- `file_path`: A string representing the full path to each file in the folder, constructed by joining folder_path and the file name.\n", "\n", - "- By doing so, we can effectively extract and analyze meteorological data for specific geographical locations, aiding in various environmental and geographical studies, as well as modeling endeavors." + "- The function then loops through each item in the `files` list. For each item:\n", + " - `file_path = os.path.join(folder_path, file)` constructs the full path to the file by combining the folder path and the file name.\n", + " - `if os.path.isfile(file_path) and str(current_year) in file_path and file_path.endswith(\".nc\"):` checks if the item is a file (not a directory), if the file name contains the current year as a substring, and if the file has a `.nc` extension (indicating a NetCDF file).\n", + " - If all these conditions are met, the file is deleted using `os.remove(file_path)`, and a message is printed to confirm the deletion." ] }, { "cell_type": "code", - "execution_count": 72, - "id": "46b63faa", + "execution_count": 4, + "id": "3a5bac91", "metadata": {}, "outputs": [], "source": [ - "def turn_gridmet_nc_to_csv(target_date=test_start_date):\n", - " \n", - " selected_date = datetime.strptime(target_date, \"%Y-%m-%d\")\n", - " generated_csvs = []\n", - " for root, dirs, files in os.walk(gridmet_folder_name):\n", - " for file_name in files:\n", - " \n", - " if str(selected_date.year) in file_name and file_name.endswith(\".nc\"):\n", - " print(f\"Checking file: {file_name}\")\n", - " var_name = get_var_from_file_name(file_name)\n", - " res_csv = f\"../data/gridmet_test_run/testing_output/{str(selected_date.year)}_{var_name}_{target_date}.csv\"\n", - "\n", - " if os.path.exists(res_csv):\n", - " #os.remove(res_csv)\n", - " # print(f\"{res_csv} already exists. Skipping..\")\n", - " print(f\"File {os.path.basename(res_csv)} exists\")\n", - " generated_csvs.append(res_csv)\n", - " continue\n", + "def remove_files_in_folder(folder_path, current_year):\n", + " \"\"\"\n", + " Remove all files in a specified folder.\n", "\n", - " # Perform operations on each file here\n", - " netcdf_file_path = os.path.join(root, file_name)\n", - " print(\"Processing file:\", netcdf_file_path)\n", - " file_name = get_file_name_from_path(netcdf_file_path)\n", + " Parameters:\n", + " folder_path (str): Path to the folder to remove files from.\n", + " \"\"\"\n", + " # Get a list of files in the folder\n", + " files = os.listdir(folder_path)\n", "\n", - " df = get_nc_csv_by_coords_and_variable(netcdf_file_path, \n", - " var_name, target_date)\n", - " df.replace('--', pd.NA, inplace=True)\n", - " df.to_csv(res_csv, index=False)\n", - " print(\"gridmet var saved: \", res_csv)\n", - " generated_csvs.append(res_csv)\n", - " \n", - " return generated_csvs " + " # Loop through the files and remove them\n", + " for file in files:\n", + " file_path = os.path.join(folder_path, file)\n", + " if os.path.isfile(file_path) and str(current_year) in file_path and file_path.endswith(\".nc\"):\n", + " os.remove(file_path)\n", + " print(f\"Deleted file: {file_path}\")" ] }, { "cell_type": "markdown", - "id": "77512189", + "id": "7cc22618", "metadata": {}, "source": [ - "## 3.2.1.15 Plot GridMET Data\n", - "\n", - "The following code snippet generates a scatter plot of GridMET data for a specified date and saves the resulting image to a file. It processes data from a CSV file and creates a visual representation of the variable \"pr\" (precipitation) on a geographical grid.\n", + "## 3.2.1.5 Download File from a URL\n", "\n", - "- `target_date`: The date for which the data is plotted, formatted as \"YYYY-MM-DD\".\n", - "- `selected_date`: A `datetime` object representing the `target_date`.\n", - "- `var_name`: A string representing the name of the variable to be plotted, set to \"pr\" (precipitation).\n", - "- `test_csv`: The file path to the CSV file containing the data to be plotted.\n", - "- `gridmet_var_df`: A DataFrame containing the loaded data from the CSV file.\n", - "- `colormaplist`: A list of colors corresponding to the value ranges in the data.\n", - "- `value_ranges`: The value ranges used to map the colors in the plot.\n", - "- `res_png_path`: The file path where the resulting plot image will be saved.\n", + "Here we download a file from a given URL and saves it to a specified location.\n", "\n", - "- **Visualization:** This function provides a visual representation of the GridMET data, specifically focusing on precipitation (\"pr\") values. Visualization helps in understanding spatial patterns and distributions in the data, making it easier to interpret and analyze.\n", - "- **Data Communication:** By saving the plot as an image, the function allows the results to be easily shared, included in reports, or further analyzed.\n", + "- `url`: A string representing the URL from which the file is to be downloaded.\n", + " - `target_file_path`: A string representing the path where the downloaded file should be saved.\n", "\n", - "We convert GridMET NetCDF files to CSV format for a specified date. We iterate through files in the GridMET folder, checking for files corresponding to the selected date. For each matching file, we extract the variable name and generate a CSV file containing the data. If the CSV file already exists, we skip the process. This process facilitates easy access and analysis of meteorological data for a specific date." + "- `with urllib.request.urlopen(url) as response:` opens a connection to the provided URL.\n", + " - `file_content = response.read()` reads the contents of the file from the URL." ] }, { "cell_type": "code", - "execution_count": 73, - "id": "62f7b586", + "execution_count": 5, + "id": "20c01c2c", "metadata": {}, "outputs": [], "source": [ - "def plot_gridmet(target_date=test_start_date):\n", - " selected_date = datetime.strptime(target_date, \"%Y-%m-%d\")\n", - " var_name = \"pr\"\n", - " test_csv = f\"../data/gridmet_test_run/testing_output/{str(selected_date.year)}_{var_name}_{target_date}.csv\"\n", - " gridmet_var_df = pd.read_csv(test_csv)\n", - " gridmet_var_df.replace('--', pd.NA, inplace=True)\n", - " gridmet_var_df.dropna(inplace=True)\n", - " gridmet_var_df['pr'] = pd.to_numeric(gridmet_var_df['pr'], errors='coerce')\n", - " \n", - " colormaplist, value_ranges = create_color_maps_with_value_range(gridmet_var_df[var_name])\n", - " \n", - " # Create a scatter plot\n", - " plt.scatter(gridmet_var_df[\"Longitude\"].values, \n", - " gridmet_var_df[\"Latitude\"].values, \n", - " label='Pressure', \n", - " color=colormaplist, \n", - " marker='o')\n", + "def download_file(url, target_file_path, variable):\n", + " \"\"\"\n", + " Download a file from a URL and save it to a specified location.\n", "\n", - " # Add labels and a legend\n", - " plt.xlabel('X-axis')\n", - " plt.ylabel('Y-axis')\n", - " plt.title('Scatter Plot Example')\n", - " plt.legend()\n", - " \n", - " res_png_path = f\"../data/gridmet_test_run/testing_output/{str(selected_date.year)}_{var_name}_{target_date}.png\"\n", - " plt.savefig(res_png_path)\n", - " print(f\"test image is saved at {res_png_path}\")" + " Parameters:\n", + " url (str): URL of the file to download.\n", + " target_file_path (str): Path where the downloaded file should be saved.\n", + " variable (str): Name of the meteorological variable being downloaded.\n", + " \"\"\"\n", + " try:\n", + " with urllib.request.urlopen(url) as response:\n", + " print(f\"Downloading {url}\")\n", + " file_content = response.read()\n", + " save_path = target_file_path\n", + " with open(save_path, 'wb') as file:\n", + " file.write(file_content)\n", + " print(f\"File downloaded successfully and saved as: {os.path.basename(save_path)}\")\n", + " except Exception as e:\n", + " print(f\"An error occurred while downloading the file: {str(e)}\")" ] }, { "cell_type": "markdown", - "id": "c27997ec", + "id": "f52923c4", "metadata": {}, "source": [ - "## 3.2.1.16 Prepare Folder and Generating Year List\n", - "The code snippet prepares a directory for storing GridMET NetCDF files and determines the relevant years based on a specified target date. It also checks if existing files cover the selected date and removes them if necessary to ensure up-to-date data.\n", + "## 3.2.1.6 Downloads Specific Meteorological Variables\n", "\n", - "- `target_date`: The date for which the data is being prepared, formatted as \"YYYY-MM-DD\".\n", - "- `selected_date`: A `datetime` object representing the `target_date`.\n", - "- `past_october_1`: A `datetime` object representing October 1st of either the current year or the previous year, depending on the `selected_date`.\n", - "- `year_list`: A list of years that are relevant to the `selected_date`, used for data processing.\n", - "- `gridmet_folder_name`: The directory where the GridMET NetCDF files are stored.\n", - "- `nc_file`: The file path to the NetCDF file for the `tmmx` variable of the current year.\n", - "- `ifremove`: A boolean flag indicating whether the existing files should be removed based on the date coverage.\n", + "Here we download specific meteorological variables from the GridMET climatology dataset for a list of years provided as input.\n", "\n", - "- **Folder Preparation:** Ensuring that the necessary directory exists before proceeding with file operations is crucial for organizing and managing data effectively.\n", - "- **Data Integrity:** By checking whether existing files cover the required date range and removing them if they do not, the function ensures that the data used in the project is up-to-date and accurate.\n", - "- **Year Selection:** The `year_list` is essential for determining which years' data should be processed, ensuring that the analysis covers the appropriate time span.\n", + "- `year_list`: A list of years for which the meteorological data is to be downloaded.\n", + "- `base_metadata_url` is a string that stores the base URL from where the meteorological data files will be downloaded.\n", + "- `variables_list` is a list containing the short names of the meteorological variables to be downloaded, such as `tmmn` (minimum temperature), `tmmx` (maximum temperature), `pr` (precipitation), `vpd` (vapor pressure deficit), `etr` (evapotranspiration), `rmax` (maximum relative humidity), `rmin` (minimum relative humidity), `vs` (wind speed).\n", "\n", - "We plot GridMET meteorological data for a specific variable and date. We read the data from a corresponding CSV file and preprocess it, ensuring valid numerical values. Then, we create a scatter plot, mapping the variable values to geographic coordinates. The color of each point on the plot represents the magnitude of the variable value. Finally, we save the plot as a PNG image for further analysis and visualization." + "- The function loops through each variable in `variables_list`.\n", + "- For each variable, it further loops through each year in `year_list`." ] }, { "cell_type": "code", - "execution_count": 74, - "id": "1c643f91", + "execution_count": 6, + "id": "0ef4050b", "metadata": {}, "outputs": [], "source": [ - "def prepare_folder_and_get_year_list(target_date=test_start_date):\n", - " # Check if the folder exists, if not, create it\n", - " if not os.path.exists(gridmet_folder_name):\n", - " os.makedirs(gridmet_folder_name)\n", + "def download_gridmet_of_specific_variables(year_list):\n", + " \"\"\"\n", + " Download specific meteorological variables from the GridMET climatology dataset.\n", + " \"\"\"\n", + " # Make a directory to store the downloaded files\n", "\n", - " selected_date = datetime.strptime(target_date, \"%Y-%m-%d\")\n", - " if selected_date.month < 10:\n", - " past_october_1 = datetime(selected_date.year - 1, 10, 1)\n", - " else:\n", - " past_october_1 = datetime(selected_date.year, 10, 1)\n", - " year_list = [selected_date.year, past_october_1.year]\n", + " base_metadata_url = \"http://www.northwestknowledge.net/metdata/data/\"\n", + " variables_list = ['tmmn', 'tmmx', 'pr', 'vpd', 'etr', 'rmax', 'rmin', 'vs']\n", "\n", - " # Remove any existing files in the folder\n", - " if selected_date.year == datetime.now().year:\n", - " # check if the current year's netcdf contains the selected date\n", - " # get etr netcdf and read\n", - " nc_file = f\"{gridmet_folder_name}/tmmx_{selected_date.year}.nc\"\n", - " ifremove = False\n", - " if os.path.exists(nc_file):\n", - " with nc.Dataset(nc_file) as ncd:\n", - " day = ncd.variables['day'][:]\n", - " # Calculate the day of the year\n", - " day_of_year = selected_date.timetuple().tm_yday\n", - " day_index = day_of_year - 1\n", - " if len(day) <= day_index:\n", - " ifremove = True\n", - " \n", - " if ifremove:\n", - " print(\"The current year netcdf has new data. Redownloading..\")\n", - " remove_files_in_folder(gridmet_folder_name, selected_date.year) # only redownload when the year is the current year\n", - " else:\n", - " print(\"The existing netcdf already covers the selected date. Avoid downloading..\")\n", - " return year_list" + " for var in variables_list[:3]:\n", + " for y in year_list:\n", + " download_link = base_metadata_url + var + '_' + '%s' % y + '.nc'\n", + " target_file_path = os.path.join(gridmet_folder_name, var + '_' + '%s' % y + '.nc')\n", + " if not os.path.exists(target_file_path):\n", + " download_file(download_link, target_file_path, var)\n", + " else:\n", + " print(f\"File {os.path.basename(target_file_path)} exists\")" ] }, { "cell_type": "markdown", - "id": "ccfe1332", + "id": "5c4e5956", "metadata": {}, "source": [ - "## 3.2.1.17 Adds a cummulative column\n", - "\n", - "Herw we add a new column to a DataFrame that contains the cumulative sum of the values in an existing column.\n", - "\n", - "- `df`: The pandas DataFrame that contains the data to which the cumulative sum column will be added.\n", - "- `column_name`: The name of the column in the DataFrame for which the cumulative sum is to be calculated.\n", - "\n", - "- **Data Enrichment:** Adding a cumulative sum column is useful for understanding trends over time, such as total precipitation over a period, cumulative sales, or accumulated values in any time series data.\n", - "- **Ease of Analysis:** By including the cumulative sum directly in the DataFrame, the function simplifies further analysis and visualization tasks that might require cumulative data.\n", - "\n", + "## 3.2.1.7 Extract File Name from File Path\n", "\n", - "We prepare the folder structure for storing GridMET data and obtain a list of relevant years based on the target date. This process ensures that the necessary directory exists for data storage and determines the appropriate years for data retrieval without delving into technical details." + "Here we extracts the file name from a given file path.\n", + "- `file_path`: A string representing the full path to a file.\n", + "- `file_name = os.path.basename(file_path)` uses the `os.path.basename()` function to extract the file name from the complete file path. The `basename()` function returns the last component of the path, which is the file name." ] }, { "cell_type": "code", - "execution_count": 75, - "id": "1621c7c1", + "execution_count": 7, + "id": "6439b645", "metadata": {}, "outputs": [], "source": [ - "def add_cumulative_column(df, column_name):\n", - " df[f'cumulative_{column_name}'] = df[column_name].sum()\n", - " return df" + "def get_file_name_from_path(file_path):\n", + " # Get the file name from the file path\n", + " file_name = os.path.basename(file_path)\n", + " return file_name" ] }, { "cell_type": "markdown", - "id": "100e461b", + "id": "d93fc723", "metadata": {}, "source": [ - "## 3.2.1.18 Prepare Cumulative History CSVs\n", - "\n", - "generates cumulative history CSVs for a specified target date. It processes GridMET data by traversing the date range from the previous October 1st to the target date, downloading the necessary data, converting it to CSV, and calculating cumulative values for specific meteorological variables. The results are saved in new CSV files.\n", + "## 3.2.1.8 Extract Variable Name from File Name\n", "\n", - "- `target_date`: The target date for generating cumulative history CSVs.\n", - "- `selected_date`: The `datetime` object representing the `target_date`.\n", - "- `past_october_1`: A `datetime` object representing October 1st of the current or previous year.\n", - "- `generated_csvs`: A list of paths to the CSV files generated for the specific date.\n", - "- `cumulative_target_path`: The file path where the cumulative CSV will be saved.\n", - "- `date_keyed_objects`: A dictionary holding CSV file paths keyed by date.\n", - "- `force`: A boolean flag indicating whether to force regeneration of cumulative CSVs.\n", + "The code snippet extracts the variable name from a given file name, assuming the file name follows a specific format.\n", "\n", - "- **Cumulative Data Analysis:** The function enables the analysis of cumulative meteorological data, such as total precipitation over a period, which is crucial for understanding long-term trends and impacts.\n", - "- **Automated Data Processing:** By automating the download, processing, and cumulative calculation steps, the function ensures that the data is prepared consistently and efficiently, reducing manual workload.\n" + "- `file_name`: A string representing the name of the file from which the variable name will be extracted.\n", + "- `var_name = str(file_name.split('_')[0])` splits the file name at the underscore and takes the first part (index `0`), which is expected to be the variable name. The `str()` function ensures that `var_name` is treated as a string." ] }, { "cell_type": "code", - "execution_count": 79, - "id": "ef804f40", + "execution_count": 8, + "id": "6b2f8595", "metadata": {}, - "outputs": [], - "source": [ - "def prepare_cumulative_history_csvs(target_date=test_start_date, force=False):\n", - " \"\"\"\n", - " Prepare cumulative history CSVs for a specified target date.\n", - "\n", - " Parameters:\n", - " - target_date (str, optional): The target date in the format 'YYYY-MM-DD'. Default is 'test_start_date'.\n", - " - force (bool, optional): If True, forcefully regenerate cumulative CSVs even if they already exist. Default is False.\n", - "\n", - " Returns:\n", - " None\n", - "\n", - " This function generates cumulative history CSVs for a specified target date. It traverses the date range from the past\n", - " October 1 to the target date, downloads gridmet data, converts it to CSV, and merges it into a big DataFrame.\n", - " The cumulative values are calculated and saved in new CSV files.\n", - "\n", - " Example:\n", - " ```python\n", - " prepare_cumulative_history_csvs(target_date='2023-01-01', force=True)\n", - " ```\n", - "\n", - " Note: This function assumes the existence of the following helper functions:\n", - " - download_gridmet_of_specific_variables\n", - " - prepare_folder_and_get_year_list\n", - " - turn_gridmet_nc_to_csv\n", - " - add_cumulative_column\n", - " - process_group_value_filling\n", - " ```\n", - "\n", - " selected_date = datetime.strptime(target_date, \"%Y-%m-%d\")\n", - " print(selected_date)\n", - " if selected_date.month < 10:\n", - " past_october_1 = datetime(selected_date.year - 1, 10, 1)\n", - " else:\n", - " past_october_1 = datetime(selected_date.year, 10, 1)\n", - "\n", - " # Rest of the function logic...\n", - "\n", - " filled_data = filled_data.loc[:, ['Latitude', 'Longitude', var_name, f'cumulative_{var_name}']]\n", - " print(\"new_df final shape: \", filled_data.head())\n", - " filled_data.to_csv(cumulative_target_path, index=False)\n", - " print(f\"new df is saved to {cumulative_target_path}\")\n", - " print(filled_data.describe())\n", - " ```\n", - "Note: This docstring includes placeholders such as \"download_gridmet_of_specific_variables\" and \"prepare_folder_and_get_year_list\" for the assumed existence of related helper functions. You should replace these placeholders with actual documentation for those functions.\n", - " \"\"\"\n", - " selected_date = datetime.strptime(target_date, \"%Y-%m-%d\")\n", - " print(selected_date)\n", - " if selected_date.month < 10:\n", - " past_october_1 = datetime(selected_date.year - 1, 10, 1)\n", - " else:\n", - " past_october_1 = datetime(selected_date.year, 10, 1)\n", - "\n", - " # Traverse and print every day from past October 1 to the specific date\n", - " current_date = past_october_1\n", - " \n", - " date_keyed_objects = {}\n", - " download_gridmet_of_specific_variables(\n", - " prepare_folder_and_get_year_list(target_date=target_date)\n", - " )\n", - " # Set the specific date you want to process\n", - " # current_date = datetime.strptime(\"2024-08-11\", '%Y-%m-%d')\n", - " current_date = datetime.strptime(\"2024-08-11\", '%Y-%m-%d')\n", - "\n", - " # Initialize a dictionary to hold the date-keyed objects\n", - " date_keyed_objects = {}\n", - "\n", - " # Process the specific date\n", - " print('Processing date:', current_date.strftime('%Y-%m-%d'))\n", - " current_date_str = current_date.strftime('%Y-%m-%d')\n", + "outputs": [], + "source": [ + "def get_var_from_file_name(file_name):\n", + " # Assuming the file name format is \"tmmm_year.csv\"\n", + " var_name = str(file_name.split('_')[0])\n", + " return var_name" + ] + }, + { + "cell_type": "markdown", + "id": "dd3bee41", + "metadata": {}, + "source": [ + "## 3.2.1.9 Extracts Coordinates from a CSV File\n", "\n", - " # Call the function to generate CSVs for the specific date\n", - " generated_csvs = turn_gridmet_nc_to_csv(target_date=current_date_str)\n", + "Here we extracts geographical coordinates (longitude and latitude) from a CSV file and returns them as a list of tuples.\n", "\n", - " # Read the CSV into a dataframe and store it in the dictionary\n", - " date_keyed_objects[current_date_str] = generated_csvs\n", + "- `coordinates`: A list initialized as empty and used to store tuples of longitude and latitude values extracted from the CSV file.\n", + "- `df`: A DataFrame created by loading the CSV file using `pandas`. It contains the data from the CSV, including the `Latitude` and `Longitude` columns.\n", + "- `lon`: Represents the longitude extracted from the current row of the DataFrame.\n", + "- `lat`: Represents the latitude extracted from the current row of the DataFrame.\n", + "- Here we iterate over each row in the DataFrame using a `for` loop: `for index, row in df.iterrows():`.\n", + "- For each row, the function extracts the `Latitude` and `Longitude` values, converting them to floating-point numbers: `lon, lat = float(row[\"Latitude\"]), float(row[\"Longitude\"])`.\n", + "- These coordinates are appended to the `coordinates` list as a tuple: `coordinates.append((lon, lat))`." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "7c636ed1", + "metadata": {}, + "outputs": [], + "source": [ + "def get_coordinates_of_template_tif():\n", + " \t# Load the CSV file and extract coordinates\n", + " coordinates = []\n", + " df = pd.read_csv(dem_csv)\n", + " for index, row in df.iterrows():\n", + " # Process each row here\n", + " lon, lat = float(row[\"Latitude\"]), float(row[\"Longitude\"])\n", + " coordinates.append((lon, lat))\n", + " return coordinates" + ] + }, + { + "cell_type": "markdown", + "id": "6b3f9072", + "metadata": {}, + "source": [ + "## 3.2.1.10 Find the Nearest Index in an Array\n", "\n", - " print(\"Processing complete for the date:\", current_date_str)\n", - " target_date = '2024-08-11'\n", - " target_generated_csvs = date_keyed_objects[target_date]\n", - " for index, single_csv in enumerate(target_generated_csvs):\n", - " # traverse the variables of gridmet here\n", - " # each variable is a loop\n", - " print(f\"creating cumulative for {single_csv}\")\n", - " \n", - " cumulative_target_path = f\"{single_csv}_cumulative.csv\"\n", - " print(\"cumulative_target_path = \", cumulative_target_path)\n", - " \n", - " if os.path.exists(cumulative_target_path) and not force:\n", - " print(f\"{cumulative_target_path} already exists, skipping..\")\n", - " continue\n", - " \n", - " # Extract the file name without extension\n", - " file_name = os.path.splitext(os.path.basename(single_csv))[0]\n", - " gap_filled_csv = f\"{cumulative_target_path}_gap_filled.csv\"\n", + "- `array`: A `numpy` array of numerical values from which the closest element to `value` is to be found.\n", + "- `value`: A numerical value for which the closest corresponding element in `array` is sought.\n", "\n", - "\t# Split the file name using underscores\n", - " var_name = file_name.split('_')[1]\n", - " print(f\"Found variable name {var_name}\")\n", - " current_date = past_october_1\n", - " new_df = pd.read_csv(single_csv)\n", - " print(new_df.head())\n", - " \n", - " all_df = pd.read_csv(f\"../data/gridmet_test_run/testing_output/{str(selected_date.year)}_{var_name}_{target_date}.csv\")\n", - " all_df[\"date\"] = target_date\n", - " all_df[var_name] = pd.to_numeric(all_df[var_name], errors='coerce')\n", - " filled_data = all_df\n", - " filled_data = filled_data[(filled_data['date'] == target_date)]\n", - " filled_data.fillna(0, inplace=True)\n", - " print(\"Finished correctly \", filled_data.head())\n", - " filled_data = filled_data[['Latitude', 'Longitude', \n", - " var_name, \n", - "# f'cumulative_{var_name}'\n", - " ]]\n", - " print(filled_data.shape)\n", - " filled_data.to_csv(cumulative_target_path, index=False)\n", - " print(f\"new df is saved to {cumulative_target_path}\")\n", - " print(filled_data.describe())\n" + "The code snippet returns the `index` of the element in the array that is closest to the given `value`." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "030aac68", + "metadata": {}, + "outputs": [], + "source": [ + "def find_nearest_index(array, value):\n", + " # Find the index of the element in the array that is closest to the given value\n", + " return (abs(array - value)).argmin()" ] }, { "cell_type": "markdown", - "id": "aa482514", + "id": "524ca247", "metadata": {}, "source": [ - "## 3.2.1.19 Running the Workflow for GridMET Data Processing\n", + "## 3.2.1.12 Calculate specific Operation Day\n", "\n", - "This script sets up the necessary directories and file paths for processing GridMET data, then runs the cumulative history CSV preparation function to generate cumulative data for meteorological variables.\n", + "Here we calculate the date exactly three days before the current date and returns it as a formatted string.\n", "\n", - "- `homedir = os.path.expanduser('~')`: Expands the tilde (`~`) to the full path of the user's home directory.\n", - "- `work_dir`: Defines the main working directory for the project, where all data processing will take place.\n", - "- `gridmet_folder_name`: Specifies the folder within the working directory where the GridMET climatology data will be stored.\n", - "- `western_us_coords`: Points to a CSV file containing the coordinates for the western U.S., derived from a DEM file.\n" + "- `current_date`: A `datetime` object representing the current date and time.\n", + "- `three_days_ago`: A `datetime` object representing the date three days before the current date.\n", + "- `three_days_ago_string`: A string representing the date three days ago, formatted as \"YYYY-MM-DD\".\n", + "- `test_start_date`: A string that stores the returned value from `get_operation_day()`, representing the operation day used in other parts of the code." ] }, { "cell_type": "code", - "execution_count": 80, - "id": "d202cb61", + "execution_count": 11, + "id": "29df8904", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The existing netcdf already covers the selected date. Avoid downloading..\n", - "File tmmn_2024.nc exists\n", - "File tmmn_2023.nc exists\n", - "File tmmx_2024.nc exists\n", - "File tmmx_2023.nc exists\n", - "File pr_2024.nc exists\n", - "File pr_2023.nc exists\n", - "Checking file: tmmn_2024.nc\n", - "File 2024_tmmn_2024-08-13.csv exists\n", - "Checking file: tmmx_2024.nc\n", - "File 2024_tmmx_2024-08-13.csv exists\n", - "Checking file: pr_2024.nc\n", - "File 2024_pr_2024-08-13.csv exists\n", - "test image is saved at ../data/gridmet_test_run/testing_output/2024_pr_2024-08-13.png\n", - "2024-08-13 00:00:00\n", - "The existing netcdf already covers the selected date. Avoid downloading..\n", - "File tmmn_2024.nc exists\n", - "File tmmn_2023.nc exists\n", - "File tmmx_2024.nc exists\n", - "File tmmx_2023.nc exists\n", - "File pr_2024.nc exists\n", - "File pr_2023.nc exists\n", - "Processing date: 2024-08-11\n", - "Checking file: tmmn_2024.nc\n", - "File 2024_tmmn_2024-08-11.csv exists\n", - "Checking file: tmmx_2024.nc\n", - "File 2024_tmmx_2024-08-11.csv exists\n", - "Checking file: pr_2024.nc\n", - "File 2024_pr_2024-08-11.csv exists\n", - "Processing complete for the date: 2024-08-11\n", - "creating cumulative for ../data/gridmet_test_run/testing_output/2024_tmmn_2024-08-11.csv\n", - "cumulative_target_path = ../data/gridmet_test_run/testing_output/2024_tmmn_2024-08-11.csv_cumulative.csv\n", - "Found variable name tmmn\n", - " Latitude Longitude tmmn\n", - "0 49.0 -125.000 --\n", - "1 49.0 -124.964 --\n", - "2 49.0 -124.928 --\n", - "3 49.0 -124.892 --\n", - "4 49.0 -124.856 --\n", - "Finished correctly Latitude Longitude tmmn date\n", - "0 49.0 -125.000 0.0 2024-08-11\n", - "1 49.0 -124.964 0.0 2024-08-11\n", - "2 49.0 -124.928 0.0 2024-08-11\n", - "3 49.0 -124.892 0.0 2024-08-11\n", - "4 49.0 -124.856 0.0 2024-08-11\n", - "(462204, 3)\n", - "new df is saved to ../data/gridmet_test_run/testing_output/2024_tmmn_2024-08-11.csv_cumulative.csv\n", - " Latitude Longitude tmmn\n", - "count 462204.000000 462204.00000 462204.000000\n", - "mean 37.030000 -112.52600 193.127659\n", - "std 6.921275 7.21226 135.027899\n", - "min 25.060000 -125.00000 0.000000\n", - "25% 31.036000 -118.77200 0.000000\n", - "50% 37.030000 -112.52600 283.600000\n", - "75% 43.024000 -106.28000 288.100000\n", - "max 49.000000 -100.05200 310.900000\n", - "creating cumulative for ../data/gridmet_test_run/testing_output/2024_tmmx_2024-08-11.csv\n", - "cumulative_target_path = ../data/gridmet_test_run/testing_output/2024_tmmx_2024-08-11.csv_cumulative.csv\n", - "Found variable name tmmx\n", - " Latitude Longitude tmmx\n", - "0 49.0 -125.000 --\n", - "1 49.0 -124.964 --\n", - "2 49.0 -124.928 --\n", - "3 49.0 -124.892 --\n", - "4 49.0 -124.856 --\n", - "Finished correctly Latitude Longitude tmmx date\n", - "0 49.0 -125.000 0.0 2024-08-11\n", - "1 49.0 -124.964 0.0 2024-08-11\n", - "2 49.0 -124.928 0.0 2024-08-11\n", - "3 49.0 -124.892 0.0 2024-08-11\n", - "4 49.0 -124.856 0.0 2024-08-11\n", - "(462204, 3)\n", - "new df is saved to ../data/gridmet_test_run/testing_output/2024_tmmx_2024-08-11.csv_cumulative.csv\n", - " Latitude Longitude tmmx\n", - "count 462204.000000 462204.00000 462204.000000\n", - "mean 37.030000 -112.52600 203.360382\n", - "std 6.921275 7.21226 142.170743\n", - "min 25.060000 -125.00000 0.000000\n", - "25% 31.036000 -118.77200 0.000000\n", - "50% 37.030000 -112.52600 298.800000\n", - "75% 43.024000 -106.28000 304.500000\n", - "max 49.000000 -100.05200 322.200000\n", - "creating cumulative for ../data/gridmet_test_run/testing_output/2024_pr_2024-08-11.csv\n", - "cumulative_target_path = ../data/gridmet_test_run/testing_output/2024_pr_2024-08-11.csv_cumulative.csv\n", - "Found variable name pr\n", - " Latitude Longitude pr\n", - "0 49.0 -125.000 --\n", - "1 49.0 -124.964 --\n", - "2 49.0 -124.928 --\n", - "3 49.0 -124.892 --\n", - "4 49.0 -124.856 --\n", - "Finished correctly Latitude Longitude pr date\n", - "0 49.0 -125.000 0.0 2024-08-11\n", - "1 49.0 -124.964 0.0 2024-08-11\n", - "2 49.0 -124.928 0.0 2024-08-11\n", - "3 49.0 -124.892 0.0 2024-08-11\n", - "4 49.0 -124.856 0.0 2024-08-11\n", - "(462204, 3)\n", - "new df is saved to ../data/gridmet_test_run/testing_output/2024_pr_2024-08-11.csv_cumulative.csv\n", - " Latitude Longitude pr\n", - "count 462204.000000 462204.00000 462204.000000\n", - "mean 37.030000 -112.52600 0.883495\n", - "std 6.921275 7.21226 2.777075\n", - "min 25.060000 -125.00000 0.000000\n", - "25% 31.036000 -118.77200 0.000000\n", - "50% 37.030000 -112.52600 0.000000\n", - "75% 43.024000 -106.28000 0.000000\n", - "max 49.000000 -100.05200 45.400000\n" + "2024-08-16\n" ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" } ], "source": [ - "gridmet_folder_name = \"../data/gridmet_test_run/gridmet_climatology\"\n", - "western_us_coords = \"../data/dem_file.tif.csv\"\n", - "\n", - "# Run the download function\n", - "download_gridmet_of_specific_variables(prepare_folder_and_get_year_list())\n", - "turn_gridmet_nc_to_csv()\n", - "plot_gridmet()\n", + "from datetime import datetime, timedelta\n", + "def get_operation_day():\n", + " # Get the current date and time\n", + " current_date = datetime.now()\n", "\n", - "# prepare testing data with cumulative variables\n", - "prepare_cumulative_history_csvs(force=True)" - ] - }, - { - "cell_type": "markdown", - "id": "68f2bea0", - "metadata": {}, - "source": [ - "## 3.2.2 Merging Meteorological Data for Comprehensive Analysis\n", + " # Calculate three days ago\n", + " three_days_ago = current_date - timedelta(days=3)\n", "\n", - "In this chapter, we do\n", - "- `Data Collection:` We fetch gridMET climatology data for various meteorological variables (e.g., temperature, precipitation) and multiple years. \n", + " # Format the date as a string\n", + " three_days_ago_string = three_days_ago.strftime(\"%Y-%m-%d\")\n", "\n", - "- `Data Processing:` After downloading, we extract relevant data for specific geographical locations corresponding to weather stations.\n", + " print(three_days_ago_string)\n", "\n", - "- `Data Integration:` We merge similar variables obtained from different years into separate CSV files. We then combine all variables together into a single comprehensive dataset for further analysis and modeling tasks." + " return three_days_ago_string\n", + "\n", + "test_start_date = get_operation_day()" ] }, { "cell_type": "markdown", - "id": "751d6689", + "id": "ea2c5e26", "metadata": {}, "source": [ + "## Utility Functions for Merging Meteorological Data for Comprehensive Analysis\n", + "\n", "## 3.2.2.1 Importing Libraries\n", "\n", - "The code snippet sets up the environment and defines the necessary paths and timeframes for a data processing task" + "The code snippet sets up the environment and defines the necessary paths and timeframes for a data processing task\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 1, - "id": "1a5713a0", + "execution_count": 12, + "id": "c5b46678", "metadata": {}, "outputs": [], "source": [ @@ -1176,7 +1441,7 @@ }, { "cell_type": "markdown", - "id": "3f8fcaa7", + "id": "c149efa8", "metadata": {}, "source": [ "## 3.2.2.2 Get Files from a Directory\n", @@ -1185,8 +1450,8 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "eb0bc340", + "execution_count": 13, + "id": "87850a0c", "metadata": {}, "outputs": [], "source": [ @@ -1199,7 +1464,7 @@ }, { "cell_type": "markdown", - "id": "bef84df2", + "id": "df8e974a", "metadata": {}, "source": [ "## 3.2.2.3 Download File from a URL\n", @@ -1209,8 +1474,8 @@ }, { "cell_type": "code", - "execution_count": 4, - "id": "e1ad571c", + "execution_count": 14, + "id": "485cb98e", "metadata": {}, "outputs": [], "source": [ @@ -1230,7 +1495,7 @@ }, { "cell_type": "markdown", - "id": "87896c19", + "id": "9d8ade90", "metadata": {}, "source": [ "## 3.2.2.4 Download GridMET Climatology Data\n", @@ -1246,8 +1511,8 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "45bab2c5", + "execution_count": 15, + "id": "f3a26326", "metadata": {}, "outputs": [], "source": [ @@ -1266,250 +1531,6 @@ " if not os.path.exists(os.path.join(folder_name, var + '_' + '%s' % y + '.nc')):\n", " download_file(download_link, folder_name)" ] - }, - { - "cell_type": "markdown", - "id": "66524b4e", - "metadata": {}, - "source": [ - "## 3.2.2.5 Extracting and Saving Data from a NetCDF File\n", - "\n", - "Here we read data from a NetCDF file, extracts specific variables, and saves the data as a CSV file.\n", - "\n", - "- `ds = xr.open_dataset(file_name)`: Opens the NetCDF file for reading.\n", - "- `var_to_extract = list(ds.keys())`: Extracts the variable names present in the dataset.\n", - "- `var_name = var_to_extract[0]`: Selects the first variable from the list for further processing.\n", - "\n", - "For each station (defined in `stations`), we extract the latitude (`lat`) and longitude (`lon`).\n", - "Using `ds.sel(lat=lat, lon=lon, method='nearest')`, we select the data nearest to the specified latitude and longitude and then convert the subset data into a DataFrame." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "3b4e75e6", - "metadata": {}, - "outputs": [], - "source": [ - "def get_gridmet_variable(file_name):\n", - " print(f\"reading values from {file_name}\")\n", - " result_data = []\n", - " ds = xr.open_dataset(file_name)\n", - " var_to_extract = list(ds.keys())\n", - " print(var_to_extract)\n", - " var_name = var_to_extract[0]\n", - " \n", - " df = pd.DataFrame(columns=['day', 'lat', 'lon', var_name])\n", - " \n", - " csv_file = f'{gridmet_save_location}/{Path(file_name).stem}.csv'\n", - " if os.path.exists(csv_file):\n", - " \tprint(f\"The file '{csv_file}' exists.\")\n", - " \treturn\n", - "\n", - " for idx, row in stations.iterrows():\n", - " lat = row['latitude']\n", - " lon = row['longitude']\n", - "\t\t\n", - " subset_data = ds.sel(lat=lat, lon=lon, method='nearest')\n", - " subset_data['lat'] = lat\n", - " subset_data['lon'] = lon\n", - " converted_df = subset_data.to_dataframe()\n", - " converted_df = converted_df.reset_index(drop=False)\n", - " converted_df = converted_df.drop('crs', axis=1)\n", - " df = pd.concat([df, converted_df], ignore_index=True)\n", - " \n", - " result_df = df\n", - " print(\"got result_df : \", result_df.head())\n", - " result_df.to_csv(csv_file, index=False)\n", - " print(f'completed extracting data for {file_name}')" - ] - }, - { - "cell_type": "markdown", - "id": "a20501ee", - "metadata": {}, - "source": [ - "## 3.2.2.6 Merge Similar Variables from Different Years\n", - "\n", - "Here we merge CSV files containing similar variables but from different years. The merged data is saved as a single CSV file for each variable.\n", - "\n", - "- `file_groups = {}`: Initializes an empty dictionary to store grouped files.\n", - "- For each file, we extract the base name (variable name) and year, then groups files by the variable name if they end with `.csv`.\n", - "- And then Files are grouped based on the part of the filename before the year, ensuring that all files related to the same variable are grouped together.\n", - "\n", - "- Here we start by listing all the files in the specified `gridmet_save_location` directory.\n", - "- And then we group the files by their base names (e.g., `temperature`, `precipitation`) by splitting the filename at the underscore (`_`) and checking if the file is a CSV file with a valid year.\n", - "- For each group of files (i.e., files with the same variable but from different years), the function reads the first 5 files into pandas DataFrames and merges them into a single DataFrame.\n", - "- The merged DataFrame is saved as a new CSV file with the base name followed by `_merged.csv`." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "5798ad94", - "metadata": {}, - "outputs": [], - "source": [ - "def merge_similar_variables_from_different_years():\n", - " files = os.listdir(gridmet_save_location)\n", - " file_groups = {}\n", - "\n", - " for filename in files:\n", - " base_name, year_ext = os.path.splitext(filename)\n", - " parts = base_name.split('_')\n", - " if len(parts) == 2 and year_ext == '.csv':\n", - " file_groups.setdefault(parts[0], []).append(filename)\n", - "\n", - " for base_name, file_list in file_groups.items():\n", - " if len(file_list) > 1:\n", - " dfs = []\n", - " for filename in file_list[:5]:\n", - " df = pd.read_csv(os.path.join(gridmet_save_location, filename))\n", - " dfs.append(df)\n", - " merged_df = pd.concat(dfs, ignore_index=True)\n", - " merged_filename = f\"{base_name}_merged.csv\"\n", - " merged_df.to_csv(os.path.join(gridmet_save_location, merged_filename), index=False)\n", - " print(f\"Merged {file_list} into {merged_filename}\")" - ] - }, - { - "cell_type": "markdown", - "id": "bc7f03a0", - "metadata": {}, - "source": [ - "## 3.2.2.7 Merge All Variables Together\n", - "\n", - "Paths for specific variables (`rmin`, `rmax`, `tmmn`, `tmmx`) are explicitly defined, and their respective CSV files are loaded into DataFrames.\n", - "\n", - "The columns in the specific DataFrames are renamed to distinguish between similar variable names (e.g., `relative_humidity` for `rmin` and `rmax`)." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "35fff41c", - "metadata": {}, - "outputs": [], - "source": [ - "def merge_all_variables_together():\n", - " merged_df = None\n", - " file_paths = []\n", - "\n", - " for filename in os.listdir(gridmet_save_location)[1:3]:\n", - " if filename.endswith(\"_merged.csv\"):\n", - " file_paths.append(os.path.join(gridmet_save_location, filename))\n", - "\t\n", - " rmin_merged_path = os.path.join(gridmet_save_location, 'rmin_merged.csv')\n", - " rmax_merged_path = os.path.join(gridmet_save_location, 'rmax_merged.csv')\n", - " tmmn_merged_path = os.path.join(gridmet_save_location, 'tmmn_merged.csv')\n", - " tmmx_merged_path = os.path.join(gridmet_save_location, 'tmmx_merged.csv')\n", - " \n", - " df_rmin = pd.read_csv(rmin_merged_path)\n", - " df_rmax = pd.read_csv(rmax_merged_path , engine='python')\n", - " df_tmmn = pd.read_csv(tmmn_merged_path)\n", - " df_tmmx = pd.read_csv(tmmx_merged_path)\n", - " \n", - " df_rmin.rename(columns={'relative_humidity': 'relative_humidity_rmin'}, inplace=True)\n", - " df_rmax.rename(columns={'relative_humidity': 'relative_humidity_rmax'}, inplace=True)\n", - " df_tmmn.rename(columns={'air_temperature': 'air_temperature_tmmn'}, inplace=True)\n", - " df_tmmx.rename(columns={'air_temperature': 'air_temperature_tmmx'}, inplace=True)\n", - " \n", - " df_rmin.to_csv(os.path.join(gridmet_save_location, 'rmin_merged.csv'))\n", - " df_rmax.to_csv(os.path.join(gridmet_save_location, 'rmax_merged.csv'))\n", - " df_tmmn.to_csv(os.path.join(gridmet_save_location, 'tmmn_merged.csv'))\n", - " df_tmmx.to_csv(os.path.join(gridmet_save_location, 'tmmx_merged.csv'))\n", - " \n", - " if file_paths:\n", - " merged_df = pd.read_csv(file_paths[0])\n", - " for file_path in file_paths[1:3]:\n", - " df = pd.read_csv(file_path)\n", - " merged_df = pd.concat([merged_df, df], axis=1)\n", - " merged_df = merged_df.loc[:, ~merged_df.columns.duplicated()]\n", - " merged_df.to_csv(final_merged_csv, index=False)\n" - ] - }, - { - "cell_type": "markdown", - "id": "999d5177", - "metadata": {}, - "source": [ - "## 3.2.2.8 Workflow for Processing GridMET Climatology Data\n", - "\n", - "- The `download_gridmet_climatology()` function is called to download the necessary climatology data files for various variables.\n", - "- `nc_files = get_files_in_directory()`: This retrieves a list of all NetCDF files downloaded in the previous step.\n", - "- A loop iterates over the first five NetCDF files in the list (`nc_files`), printing each file's name and extracting data using the `get_gridmet_variable(nc)` function.\n", - "- `merge_similar_variables_from_different_years()`: This function is called to merge data files containing the same variable but from different years into a single file for each variable.\n", - "- `merge_all_variables_together()`: Finally, this function combines the merged data files for different variables into one comprehensive dataset, allowing for integrated analysis." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "33d38173", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "downloading http://www.northwestknowledge.net/metdata/data/tmmn_2021.nc\n", - "downloading http://www.northwestknowledge.net/metdata/data/tmmx_2021.nc\n", - "downloading http://www.northwestknowledge.net/metdata/data/pr_2021.nc\n", - "downloading http://www.northwestknowledge.net/metdata/data/vpd_2021.nc\n", - "downloading http://www.northwestknowledge.net/metdata/data/etr_2021.nc\n", - "downloading http://www.northwestknowledge.net/metdata/data/rmax_2021.nc\n", - "downloading http://www.northwestknowledge.net/metdata/data/rmin_2021.nc\n", - "downloading http://www.northwestknowledge.net/metdata/data/vs_2021.nc\n", - "reading values from ../data/gridmet_test_run/gridmet_climatology/rmin_2021.nc\n", - "['relative_humidity']\n", - "The file '../data/gridmet_test_run/gridmet_climatology/rmin_2021.csv' exists.\n", - "reading values from ../data/gridmet_test_run/gridmet_climatology/tmmn_2021.nc\n", - "['air_temperature']\n", - "The file '../data/gridmet_test_run/gridmet_climatology/tmmn_2021.csv' exists.\n", - "reading values from ../data/gridmet_test_run/gridmet_climatology/etr_2021.nc\n", - "['potential_evapotranspiration']\n", - "The file '../data/gridmet_test_run/gridmet_climatology/etr_2021.csv' exists.\n", - "reading values from ../data/gridmet_test_run/gridmet_climatology/etr_2020.nc\n", - "['potential_evapotranspiration']\n", - "The file '../data/gridmet_test_run/gridmet_climatology/etr_2020.csv' exists.\n", - "reading values from ../data/gridmet_test_run/gridmet_climatology/rmin_2020.nc\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/homebrew/lib/python3.10/site-packages/xarray/backends/plugins.py:80: RuntimeWarning: Engine 'cfgrib' loading failed:\n", - "Cannot find the ecCodes library\n", - " warnings.warn(f\"Engine {name!r} loading failed:\\n{ex}\", RuntimeWarning)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['relative_humidity']\n", - "The file '../data/gridmet_test_run/gridmet_climatology/rmin_2020.csv' exists.\n", - "Merged ['tmmn_2020.csv', 'tmmn_2021.csv', 'tmmn_merged.csv'] into tmmn_merged.csv\n", - "Merged ['vs_merged.csv', 'vs_2020.csv', 'vs_2021.csv'] into vs_merged.csv\n", - "Merged ['rmax_merged.csv', 'rmax_2020.csv', 'rmax_2021.csv'] into rmax_merged.csv\n", - "Merged ['etr_merged.csv', 'etr_2020.csv', 'etr_2021.csv'] into etr_merged.csv\n", - "Merged ['vpd_merged.csv', 'vpd_2020.csv', 'vpd_2021.csv'] into vpd_merged.csv\n", - "Merged ['tmmx_2021.csv', 'tmmx_2020.csv', 'tmmx_merged.csv'] into tmmx_merged.csv\n", - "Merged ['pr_2021.csv', 'pr_2020.csv', 'pr_merged.csv'] into pr_merged.csv\n", - "Merged ['rmin_2021.csv', 'rmin_merged.csv', 'rmin_2020.csv'] into rmin_merged.csv\n" - ] - } - ], - "source": [ - "download_gridmet_climatology()\n", - "\n", - "nc_files = get_files_in_directory()\n", - "for nc in nc_files[:5]:\n", - " get_gridmet_variable(nc)\n", - "merge_similar_variables_from_different_years()\n", - "merge_all_variables_together()" - ] } ], "metadata": { From a0979e74b6b183988372ae0ca5a883b1438caad3 Mon Sep 17 00:00:00 2001 From: iammeghana Date: Mon, 19 Aug 2024 15:02:58 -0400 Subject: [PATCH 2/2] correction heading numbers --- book/chapters/gridmet.ipynb | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/book/chapters/gridmet.ipynb b/book/chapters/gridmet.ipynb index 9f9c318..3bb1811 100644 --- a/book/chapters/gridmet.ipynb +++ b/book/chapters/gridmet.ipynb @@ -953,7 +953,7 @@ "id": "fba2568a", "metadata": {}, "source": [ - "## 3.2.1.1 Setup and Variable Mapping\n", + "## 1. Setup and Variable Mapping\n", "\n", "The following code snippet sets up the environment by importing necessary libraries, defining a workspace, and mapping variables.\n", "\n", @@ -1014,7 +1014,7 @@ "id": "b80cd848", "metadata": {}, "source": [ - "## 3.2.1.2 Map Values to Colors\n", + "## 2. Map Values to Colors\n", "\n", "Here we generate a color mapping for a given column of data based on specified or automatically calculated value ranges. It returns the color mapping and the value ranges used.\n", "\n", @@ -1064,7 +1064,7 @@ "id": "8267ed30", "metadata": {}, "source": [ - "## 3.2.1.3 Retrive the Current Year\n", + "## 3. Retrive the Current Year\n", "\n", "The following code snippet retrives the current year from the system's date and time." ] @@ -1101,7 +1101,7 @@ "id": "6bb255e8", "metadata": {}, "source": [ - "## 3.2.1.4 Removes Specific Files in a Folder\n", + "## 4. Removes Specific Files in a Folder\n", "\n", "We remove all files within the specified folder.\n", "\n", @@ -1146,7 +1146,7 @@ "id": "7cc22618", "metadata": {}, "source": [ - "## 3.2.1.5 Download File from a URL\n", + "## 5. Download File from a URL\n", "\n", "Here we download a file from a given URL and saves it to a specified location.\n", "\n", @@ -1190,7 +1190,7 @@ "id": "f52923c4", "metadata": {}, "source": [ - "## 3.2.1.6 Downloads Specific Meteorological Variables\n", + "## 6. Downloads Specific Meteorological Variables\n", "\n", "Here we download specific meteorological variables from the GridMET climatology dataset for a list of years provided as input.\n", "\n", @@ -1233,7 +1233,7 @@ "id": "5c4e5956", "metadata": {}, "source": [ - "## 3.2.1.7 Extract File Name from File Path\n", + "## 7. Extract File Name from File Path\n", "\n", "Here we extracts the file name from a given file path.\n", "- `file_path`: A string representing the full path to a file.\n", @@ -1258,7 +1258,7 @@ "id": "d93fc723", "metadata": {}, "source": [ - "## 3.2.1.8 Extract Variable Name from File Name\n", + "## 8. Extract Variable Name from File Name\n", "\n", "The code snippet extracts the variable name from a given file name, assuming the file name follows a specific format.\n", "\n", @@ -1284,7 +1284,7 @@ "id": "dd3bee41", "metadata": {}, "source": [ - "## 3.2.1.9 Extracts Coordinates from a CSV File\n", + "## 9. Extracts Coordinates from a CSV File\n", "\n", "Here we extracts geographical coordinates (longitude and latitude) from a CSV file and returns them as a list of tuples.\n", "\n", @@ -1320,7 +1320,7 @@ "id": "6b3f9072", "metadata": {}, "source": [ - "## 3.2.1.10 Find the Nearest Index in an Array\n", + "## 10. Find the Nearest Index in an Array\n", "\n", "- `array`: A `numpy` array of numerical values from which the closest element to `value` is to be found.\n", "- `value`: A numerical value for which the closest corresponding element in `array` is sought.\n", @@ -1345,7 +1345,7 @@ "id": "524ca247", "metadata": {}, "source": [ - "## 3.2.1.12 Calculate specific Operation Day\n", + "## 11. Calculate specific Operation Day\n", "\n", "Here we calculate the date exactly three days before the current date and returns it as a formatted string.\n", "\n", @@ -1395,7 +1395,7 @@ "source": [ "## Utility Functions for Merging Meteorological Data for Comprehensive Analysis\n", "\n", - "## 3.2.2.1 Importing Libraries\n", + "## 1. Importing Libraries\n", "\n", "The code snippet sets up the environment and defines the necessary paths and timeframes for a data processing task\n", "\n" @@ -1444,7 +1444,7 @@ "id": "c149efa8", "metadata": {}, "source": [ - "## 3.2.2.2 Get Files from a Directory\n", + "## 2. Get Files from a Directory\n", "We collect the names of files with the extension \".nc\" within a specified directory by iterating through all files, appending their names to a list, and returning the list." ] }, @@ -1467,7 +1467,7 @@ "id": "df8e974a", "metadata": {}, "source": [ - "## 3.2.2.3 Download File from a URL\n", + "## 3. Download File from a URL\n", "\n", "Here we download a file from a given URL and saves it to a specified location on your system." ] @@ -1498,7 +1498,7 @@ "id": "9d8ade90", "metadata": {}, "source": [ - "## 3.2.2.4 Download GridMET Climatology Data\n", + "## 4. Download GridMET Climatology Data\n", "\n", "We attempt to download a file from a specified URL. We then save the downloaded file to a specified location.\n", "\n",