diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 0000000..4d3e559 --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,39 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Python CI + +on: + push: + branches: ["main"] + pull_request: + branches: ["main"] + +jobs: + build: + runs-on: ubuntu-latest + container: ghcr.io/osgeo/gdal:ubuntu-small-3.9.3 + strategy: + fail-fast: false + matrix: + python-version: ["3.10"] + + steps: + - name: Install system + run: | + apt-get update -qqy + apt-get install -y git python3-pip libpq5 libpq-dev + - uses: actions/checkout@v4 + with: + submodules: 'true' + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install -r requirements.txt + - name: Lint with pylint + run: | + python3 -m pylint deltap predictors prepare-layers prepare-species diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..30fd8a6 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "aoh-calculator"] + path = aoh-calculator + url = git@github.com:quantifyearth/aoh-calculator.git diff --git a/.pylintrc b/.pylintrc index ac01bde..56e254c 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,4 +1,4 @@ [FORMAT] max-line-length=120 -disable=C0114, C0115, C0116, W0511, R0801, R0902, R0912, R0913, R0914, R0915, R1705, W0231 \ No newline at end of file +disable=C3001, C0104, C0114, C0115, C0116, W0511, R0801, R0902, R0911, R0912, R0913, R0914, R0915, R0917, R1705, W0231, \ No newline at end of file diff --git a/IUCN-importer/README.md b/IUCN-importer/README.md deleted file mode 100644 index d257546..0000000 --- a/IUCN-importer/README.md +++ /dev/null @@ -1,27 +0,0 @@ - -## Overview - -This program to collates IUCN Red List data to help researchers calculate the likelihood of whether a species will become extinct or remain extant. It processes the habitats, elevation limits, and range polygons for entire classes of species and aggregates them into a single geo-package. This geo-package can be used to calculate and map the AOH for different species individually. Mapping the AOH of species is the first step in determining its persistence metric. - -### Key Instructions - -The data for the species you are interested in needs to be **downloaded, extracted and combined** - the IUCN only lets you download the csv files and the shape files separately, but they all need to be in the *same file*. - -That file name then becomes the argument that you pass in with the program when running in a cmd prompt. - -### Things to know - -* The program can take a while to run - it has to read in a shapefile each time, which is usually over 1GB in size for a class. - -* If there is no shapefile data for a species that is present in one of the csv files, that species won't appear in the final file. - -* The shapefile's column for season is coded into integers from 1-5: - 1. Resident - 2. Breeding - 3. Non-Breeding - 4. Passing - 5. Unknown - 4 and 5 are ignored and assumed to be 1 for the purposes of this program. - - - diff --git a/IUCN-importer/iucn_gpkg_creator.py b/IUCN-importer/iucn_gpkg_creator.py deleted file mode 100644 index 42524cd..0000000 --- a/IUCN-importer/iucn_gpkg_creator.py +++ /dev/null @@ -1,174 +0,0 @@ -# The shapefile's column for season is coded into integers from 1-5: -# 1 = Resident, -# 2 = Breeding, -# 3 = Non-Breeding, -# 4 = Passing, -# 5 = Unknown -# We don't care about 4 and 5, so they just get turned into 1s. -# BUT we need these to be in text format, so we can merge the csv file with the shape file on this column (as well as -# internalTaxonId) HOWEVER in the case of Amphibians, although the csv file seems to indicate that some species have -# breeding grounds as well as residency, it is actually only the HABITAT not the LOCATION that changes for breeding: -# e.g. frogs will live in the forest, but breed in the ponds, but stay in the same geographical area. Therefore if -# every species has a 1, then I will merge the tables on internaltaxonId only, and copy the shapefiles for both -# breeding and non-breeding/resident. - -# Imports -import argparse -import glob -import json -import os.path -from typing import Tuple - -import pandas as pd -import geopandas as gpd -from shapely.geometry import Polygon, MultiPolygon - -PYDEVD_WARN_SLOW_RESOLVE_TIMEOUT = 10 - -# This function opens up and reads the files necessary to make the geopackage from the specified folder -def reading_files(folder: str) -> Tuple[pd.DataFrame, pd.DataFrame, gpd.GeoDataFrame]: - print("Reading csv files...") - habitats = pd.read_csv(os.path.join(folder, "habitats.csv")) - allotherfields = pd.read_csv(os.path.join(folder, "all_other_fields.csv")) - print("Reading shape file...") - shapename = ''.join(glob.glob(os.path.join(folder, '*.shp'))) - geo = gpd.read_file(shapename) - return habitats, allotherfields, geo - -# The shapefile's column for season is coded into integers from 1-5: -# 1 = Resident, 2 = Breeding, 3 = Non-Breeding, 4 = Passing, 5 = Unknown -# The habitats.csv column for season is the string version of the above coding. -# Any season that is passing, unknown, or simply doesn't have a value, is marked as resident -def changing_seasons(seasons: pd.Series) -> pd.Series: - if isinstance(seasons, str): - seasons.fillna("Resident", inplace = True) - season_array = [] - for season in seasons: - if ("NON-BREEDING" in str(season).upper()) or ('3' in str(season)): - season_array.append(3) - elif ("BREEDING" in str(season).upper()) or ('2' in str(season)): - season_array.append(2) - else: - season_array.append(1) - return pd.Series(season_array) - -#This function extracts, fills and replaces key pandas Series -#This function also creates temporary pandas DataFrames for manipulation -def extracting_series( - habitats: pd.DataFrame, - allotherfields: pd.DataFrame, - geo: gpd.GeoDataFrame -) -> Tuple[pd.DataFrame, pd.DataFrame, gpd.GeoDataFrame]: - print("Extracting key data...") - habitats['majorImportance'].fillna("Yes", inplace = True) - - temp_season = changing_seasons(habitats['season']) - habitats = habitats.drop(['season'], axis = 1) - habitats = habitats.assign(season = temp_season) - - temp = pd.DataFrame(data = pd.Series(allotherfields['internalTaxonId'])) - temp = temp.assign(ElevationLower = pd.Series(allotherfields['ElevationLower.limit']).fillna(-500.0), - ElevationUpper = pd.Series(allotherfields['ElevationUpper.limit']).fillna(9000.0)) - - notfinal = gpd.GeoDataFrame(data = geo['geometry']) - # We've seen both CAPS and noncaps column names, so here we just force everything to CAPS for consistency - geo = geo.rename(columns={ - 'id_no': 'ID_NO', - 'presence': 'PRESENCE', - 'origin': 'ORIGIN', - 'seasonal': 'SEASONAL', - }) - notfinal = notfinal.assign(internalTaxonId = pd.Series(geo['ID_NO']), Presence = pd.Series(geo['PRESENCE']), - Origin = pd.Series(geo['ORIGIN']), season = changing_seasons(pd.Series(geo['SEASONAL']))) - - return habitats, temp, notfinal - -# This function aggregates the data from the two csv files together -def habitats_sort(habitats: pd.DataFrame, temp: pd.DataFrame) -> pd.DataFrame: - print("Grouping files...") - habitats = habitats.groupby(['internalTaxonId', 'season']).agg({ - 'code': lambda x: json.dumps(x.tolist()), - 'majorImportance': lambda x: json.dumps(x.tolist()), - 'suitability': lambda x: json.dumps(x.tolist()) - }).reset_index() - habitats = habitats.merge(temp, how='left', on='internalTaxonId') - return habitats - -# This function takes the 'geometry' row and if the row has more than one polygon or -# multipolygon in it, combines them together to make a new multipolygon -def to_polygons(geometries): - for geometry in geometries: - if isinstance(geometry, Polygon): - yield geometry - elif isinstance(geometry, MultiPolygon): - yield from geometry.geoms - else: - raise TypeError(f"Unexpected type: {type(geometry)}") - -# This function aggregates the GeoDataFrame, and then merges it with the data from the csv files -def shape_sort(notfinal: gpd.GeoDataFrame, habitats: pd.DataFrame) -> gpd.GeoDataFrame: - #print(notfinal) #Debugging - print("Combining files...") - notfinal = (notfinal.groupby(['internalTaxonId', 'season']).agg({ - 'Presence': lambda x: json.dumps(x.tolist()), - 'Origin': lambda x: json.dumps(x.tolist()), - 'geometry': lambda x: MultiPolygon(to_polygons(x)) - })).reset_index() - notfinal = notfinal.merge(habitats, how='left', on=['internalTaxonId', 'season']) - return notfinal - -# This function converts the GeoDataFrame into a GeoPackage -def to_file(notfinal: gpd.GeoDataFrame, output_path: str) -> None: - print("Building file...") - # When the non-geometry related series are added, final becomes a DataFrame - - # so it has to be turned back into a GeoDataFrame - final = gpd.GeoDataFrame(notfinal, geometry = 'geometry') - final.to_file(output_path, driver = 'GPKG', index = None) - print(f"File {output_path} created") - -# This function records the IUCN Red List Taxonomy ID for the species that were -# not in the final geopackage -def unrecorded_data(habitats: pd.DataFrame, notfinal: gpd.GeoDataFrame): - taxid = pd.DataFrame(habitats['internalTaxonId']) - csv_set = set() - for _, row1 in taxid.iterrows(): - csv_set.add(row1['internalTaxonId']) - nopair = set() - inopair = 0 - for _, row2 in notfinal.iterrows(): - if row2['internalTaxonId'] in csv_set: - continue - nopair.add(row2['internalTaxonId']) - inopair = inopair+1 - if inopair > 0: - print("There was not enough data for: ") - for x in nopair: - print(x) - -def main(): - parser = argparse.ArgumentParser(description="Process IUCN raw data to gpkg file.") - parser.add_argument( - '--raw', - type=str, - help='Directory containing raw CSV and shape files from IUCN', - required=True, - dest='input_directory_path', - ) - parser.add_argument( - '--output', - type=str, - help='Name of result package file', - required=True, - dest='output_path', - ) - args = parser.parse_args() - - habitats, allotherfields, geo = reading_files(args.input_directory_path) - habitats, temp, notfinal = extracting_series(habitats, allotherfields, geo) - habitats = habitats_sort(habitats, temp) - notfinal = shape_sort(notfinal, habitats) - to_file(notfinal, args.output_path) - #unrecorded_data(habitats, notfinal) - -if __name__ == "__main__": - main() diff --git a/aoh-calculator b/aoh-calculator new file mode 160000 index 0000000..b6574cc --- /dev/null +++ b/aoh-calculator @@ -0,0 +1 @@ +Subproject commit b6574ccc027e3f81db011f793e334f3bbc0f2776 diff --git a/aohcalc.py b/aohcalc.py deleted file mode 100644 index 2c37cf4..0000000 --- a/aohcalc.py +++ /dev/null @@ -1,162 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import json -import os -import sys - - -import cProfile -import pstats - -from iucn_modlib.factories import TaxonFactories - -import persistence - - -parser = argparse.ArgumentParser(description="Area of habitat calculator.") -parser.add_argument( - '--taxid', - type=int, - help="animal taxonomy id", - required=True, - dest="species" -) -parser.add_argument( - '--seasonality', - type=str, - help="which season to calculate for (breeding, nonbreeding, or resident)", - required=True, - dest="seasonality" -) -parser.add_argument( - '--experiment', - type=str, - help="name of experiment group from configuration json", - required=True, - dest="experiment" -) -parser.add_argument( - '--config', - type=str, - help="path of configuration json", - required=False, - dest="config_path", - default="config.json" -) -parser.add_argument( - '--geotiffs', - type=str, - help='directory where area geotiffs should be stored', - required=False, - dest='results_path', - default=None, -) -parser.add_argument( - '--nogpu', - type=str, - help='disable CUDA usage', - required=False, - dest='nogpu', - default='True', -) -parser.add_argument( - '--profile', - type=bool, - help='enable profiling', - required=False, - dest='profile', - default=False, -) -args = vars(parser.parse_args()) - -if args['nogpu'].lower() in ['t', 'true']: - persistence.USE_GPU = False - -try: - seasonality = persistence.Seasonality(args['seasonality']) -except ValueError: - print(f'Seasonality {args["seasonality"]} is not valid') - sys.exit(-1) - -try: - with open(args['config_path'], 'r', encoding='utf-8') as config_file: - config = json.load(config_file) -except FileNotFoundError: - print(f'Failed to find configuration json file {args["config_path"]}') - sys.exit(-1) -except json.decoder.JSONDecodeError as e: - print(f'Failed to parse {args["config_path"]} at line {e.lineno}, column {e.colno}: {e.msg}') - sys.exit(-1) - -try: - experiment = config['experiments'][args['experiment']] -except KeyError: - if not 'experiments' in config: - print("No experiments section founnd in configuration json") - else: - print(f'Failed to find experiment with name {args["experiment"]}. Options found:') - for experiment in config['experiments']: - print(f'\t{experiment}') - sys.exit(-1) - -if 'iucn_batch' in experiment: - batch = TaxonFactories.loadBatchSource(experiment['iucn_batch']) - species = TaxonFactories.TaxonFactoryRedListBatch(args['species'], batch) -else: - try: - species = TaxonFactories.TaxonFactoryRedListAPI(args['species'], config['iucn']['api_key']) - except KeyError: - print("Failed to find IUCN API key in config file or batch path in experiment.") - sys.exit(-1) - -try: - translator = experiment['translator'] - if translator == 'jung': - TranslatorType = persistence.JungModel - elif translator == 'esacci': - TranslatorType = persistence.ESACCIModel - else: - print(f'Translator type of "{translator}" not recognised. Expected "jung" or "esacci".') - sys.exit(-1) -except KeyError: - print(f'Experiment {args["experiment"]} is missing a translator key') - sys.exit(-1) - -try: - land = TranslatorType( - experiment['habitat'], - experiment['elevation'], - experiment['area'] - ) -except KeyError: - print(f'Experiment "{args["experiment"]}" was missing one or more of the map keys: "habitat", "elevation", "area".') - sys.exit(-1) - -try: - range_path = experiment['range'] -except KeyError: - print(f'Experiment "{args["experiment"]}" was missing range key.') - -if args['results_path']: - if not os.path.isdir(args['results_path']): - print(f'Provided results path {args["results_path"]} is not a directory') - sys.exit(-1) - -# pylint: disable=C0103 -profiler = None -if args['profile']: - profiler = cProfile.Profile() - profiler.enable() - -try: - result = persistence.calculator(species, range_path, land, seasonality, args['results_path']) -except KeyboardInterrupt: - pass - -if profiler is not None: - profiler.disable() - p = pstats.Stats(profiler) - p.sort_stats(pstats.SortKey.TIME).print_stats(20) - -print(', '.join([str(x) for x in result])) diff --git a/deltap/delta_P_hectare.r b/deltap/delta_P_hectare.r deleted file mode 100644 index 8ed009e..0000000 --- a/deltap/delta_P_hectare.r +++ /dev/null @@ -1,111 +0,0 @@ -# Converts the cumulative deltaP rasters to values / hectare land use change -# NOT VERY EFFICIENT- THERE ARE PROBALY MUCH FASTER WAYS -rm(list = ls()) -# Dependancies -library(raster) -library(terra) -library(gdata) - -# Function to clean up the delta_P rasters and save them: -# - -# INPUTS: FOLDER TO RASTERS -# LUC RASTER -# OUTPUT: RASTER STACK -process_rasters <- function(data_dir, area_restore_path) { - # List raster files in the directory - rastlist <- list.files(path = data_dir, pattern = '.tif$', all.files = TRUE, full.names = TRUE) - # Read the area_restore raster - area_restore <- rast(area_restore_path) - # Filter area_restore - area_restore_filter <- ifel(area_restore < 1e4, 0, area_restore) - area_restore_filter <- area_restore_filter / 1e4 - - # Create a blank raster - blank_raster <- rast(area_restore_filter) - values(blank_raster) <- 0 - # Initialize an empty stack - stack <- rast() - for (i in 1:length(rastlist)) { - rast_i <- rast(rastlist[i]) - # Mosaic blank_raster and rast_i, and set names - rast_i_extent <- mosaic(blank_raster, rast_i, fun = "sum") - # Only keep values where LUC > hectare - rast_i_extent <- ifel(area_restore_filter != 0, rast_i_extent, 0) - rast_i_extent_ha <- rast_i_extent / area_restore_filter - # Set names - names(rast_i_extent_ha) <- names(rast_i) - # Append to the stack - stack <- append(stack, rast_i_extent_ha) - } - - return(stack) -} - -# WRITE THE 0.25 restore and arable rasters -results_stack<-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/restore/0.25","/maps/results/global_analysis/rasters/area_1_arc/area/diff_restore_area.tif") -writeRaster(results_stack, filename = "/maps/results/global_analysis/outputs_mwd_processed/restore/restore_0.25.tif") - -min(results_stack$amphibians) -min(results_stack$birds) - -keep(process_rasters,sure = TRUE) -results_stack <-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/arable/0.25","/maps/results/global_analysis/rasters/area_1_arc/area/arable_diff_area.tif") -plot(-1*(log(results_stack$birds)) -# MIGHT BE A PROBLEM WITH THIS?> -writeRaster(results_stack, filename = "/maps/results/global_analysis/outputs_mwd_processed/arable/arable_0.25.tif") -min(results_stack) -plot(results_stack$amphibians) - -## WRITE OTHERS IF I WANT - -# RESTORE -# 0.1 -restore_0.1<-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/restore/0.1","/maps/results/global_analysis/rasters/area_1_arc/area/diff_restore_area.tif") -writeRaster(restore_0.1, filename = "/maps/results/global_analysis/outputs_mwd_processed/restore/restore_0.1.tif") -min(results_stack$amphibians) -min(results_stack$birds) - -keep(process_rasters,sure = TRUE) -# 0.5 -restore_0.5<-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/restore/0.5","/maps/results/global_analysis/rasters/area_1_arc/area/diff_restore_area.tif") -writeRaster(restore_0.5, filename = "/maps/results/global_analysis/outputs_mwd_processed/restore/restore_0.5.tif") - - -keep(process_rasters,sure = TRUE) -#1.0 -restore_1<-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/restore/1.0","/maps/results/global_analysis/rasters/area_1_arc/area/diff_restore_area.tif") -writeRaster(restore_1, filename = "/maps/results/global_analysis/outputs_mwd_processed/restore/restore_1.0.tif") -keep(process_rasters,sure = TRUE) -#gompertz -restore_gompertz<-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/restore/gompertz","/maps/results/global_analysis/rasters/area_1_arc/area/diff_restore_area.tif") -writeRaster(restore_gompertz, filename = "/maps/results/global_analysis/outputs_mwd_processed/restore/restore_gompertz.tif") - - -# Conserve -# 0.1 -keep(process_rasters,sure = TRUE) -arable_0.1 <-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/arable/0.1","/maps/results/global_analysis/rasters/area_1_arc/area/arable_diff_area.tif") -hist(log(arable_0.1$birds)) -writeRaster(arable_0.1, filename = "/maps/results/global_analysis/outputs_mwd_processed/arable/arable_0.1.tif") - - -# 0.5 -keep(process_rasters,sure = TRUE) -arable_0.5 <-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/arable/0.5","/maps/results/global_analysis/rasters/area_1_arc/area/arable_diff_area.tif") -writeRaster(arable_0.5, filename = "/maps/results/global_analysis/outputs_mwd_processed/arable/arable_0.5.tif") - - - - -# 1,0 -keep(process_rasters,sure = TRUE) -arable_1 <-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/arable/1.0","/maps/results/global_analysis/rasters/area_1_arc/area/arable_diff_area.tif") -writeRaster(arable_1, filename = "/maps/results/global_analysis/outputs_mwd_processed/arable/arable_1.0.tif") - - - -# Gompertz -keep(process_rasters,sure = TRUE) -arable_gompertz <-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/arable/gompertz","/maps/results/global_analysis/rasters/area_1_arc/area/arable_diff_area.tif") -writeRaster(arable_gompertz, filename = "/maps/results/global_analysis/outputs_mwd_processed/arable/arable_gompertz.tif") diff --git a/deltap/delta_p_scaled_area.py b/deltap/delta_p_scaled_area.py new file mode 100644 index 0000000..e46aa4a --- /dev/null +++ b/deltap/delta_p_scaled_area.py @@ -0,0 +1,95 @@ +import argparse +import os +import sys +from glob import glob + +import numpy as np +from yirgacheffe.layers import RasterLayer + +SCALE = 1e6 + +def delta_p_scaled_area( + input_path: str, + diff_area_map_path: str, + output_path: str, +): + dirname, basename = os.path.split(output_path) + os.makedirs(dirname, exist_ok=True) + + per_taxa = [ + RasterLayer.layer_from_file(os.path.join(input_path, x)) + for x in sorted(glob("*.tif", root_dir=input_path)) + ] + if not per_taxa: + sys.exit(f"Failed to find any per-taxa maps in {input_path}") + + area_restore = RasterLayer.layer_from_file(diff_area_map_path) + + for layer in per_taxa: + try: + layer.set_window_for_union(area_restore.area) + except ValueError: + layer.set_window_for_intersection(area_restore.area) + + area_restore_filter = area_restore.numpy_apply(lambda c: np.where(c < SCALE, float('nan'), c)) / SCALE + + per_taxa_path = os.path.join(dirname, f"per_taxa_{basename}") + with RasterLayer.empty_raster_layer_like( + area_restore, + filename=per_taxa_path, + nodata=float('nan'), + bands=len(per_taxa) + ) as result: + for idx, inlayer in enumerate(per_taxa): + _, name = os.path.split(inlayer.name) + result._dataset.GetRasterBand(idx+1).SetDescription(name[:-4]) # pylint: disable=W0212 + scaled_filtered_layer = inlayer.numpy_apply( + lambda il, af: np.where(af != 0, (il / af) * -1.0, float('nan')), + area_restore_filter + ) + scaled_filtered_layer.parallel_save(result, band=idx + 1) + + summed_output_path = os.path.join(dirname, f"summed_{basename}") + with RasterLayer.empty_raster_layer_like(area_restore, filename=summed_output_path, nodata=float('nan')) as result: + summed_layer = per_taxa[0] + for layer in per_taxa[1:]: + summed_layer = summed_layer + layer + scaled_filtered_layer = summed_layer.numpy_apply( + lambda il, af: np.where(af != 0, (il / af) * -1.0, float('nan')), + area_restore_filter + ) + scaled_filtered_layer.parallel_save(result) + +def main() -> None: + parser = argparse.ArgumentParser(description="Scale final .") + parser.add_argument( + '--input', + type=str, + help='Path of map of extinction risk', + required=True, + dest='input_path', + ) + parser.add_argument( + '--diffmap', + type=str, + help='Path of map of scenario difference scaled by area', + required=True, + dest='diff_area_map_path', + ) + parser.add_argument( + '--output', + type=str, + help='Path where final map should be stored', + required=True, + dest='output_path', + ) + args = parser.parse_args() + + delta_p_scaled_area( + args.input_path, + args.diff_area_map_path, + args.output_path + ) + +if __name__ == "__main__": + main() diff --git a/deltap/gcrgen.py b/deltap/gcrgen.py deleted file mode 100644 index e835c40..0000000 --- a/deltap/gcrgen.py +++ /dev/null @@ -1,93 +0,0 @@ -""" -Generate list of args for use with littlejohn and global_code_residents_pixel.py - -Note this works for my file structure specifically to avoid a crazy amount of -messy kwargs, but can be adapted fairly easily. - -""" - -import argparse -import os - -import pandas as pd - -parser = argparse.ArgumentParser(description="") -parser.add_argument('--target_dir', - type=str,help="Look for folders called 'search' in this directory", - required=True,dest="target_dir") -parser.add_argument('--scenario', required=True, dest="scenario") -parser.add_argument('--output_dir', - type = str, help = "where to save the csv", - required = True, dest = "output_dir") -args = vars(parser.parse_args()) - - -classes = ["birds", "mammals", "amphibians", "reptiles"] -z_values = ["gompertz"] -season = "RESIDENT" -habmaps = {"historic" : "pnv", - "scenario" : args["scenario"], - "current" : "current_L1L2" - } -habmaps_r = {v: k for k, v in habmaps.items()} - - -target_dir = args["target_dir"] - -# for z in z_values: -# for taxa in classes: -# os.makedirs(os.path.join(args["output_dir"], args["scenario"], str(z), taxa), exist_ok=True) -os.makedirs(args["output_dir"], exist_ok=True) - -tif_file_paths = [] -for path, subdirs, files in os.walk(args["target_dir"]): - for name in files: - _, ext = os.path.splitext(name) - if ext == '.tif': - tif_file_paths.append(os.path.join(path, name)) - -df = pd.DataFrame() -index_levels = ["taxid", "season", "taxclass"] -df.index = pd.MultiIndex(levels=[[]] * len(index_levels), codes=[[]] * len(index_levels), names=index_levels) - -for i, file in enumerate(tif_file_paths): - # print("Reading in files: ", round(i/len(tif_file_paths), 4), end = "\r" ) - - path, fname = os.path.split(file) - taxid = fname.split("-")[-1].split(".")[0] - season = fname.split("-")[0].split(".")[-1] - c1 = 0 - for tc in classes: - if tc in path: - taxclass = tc - c1 += 1 - c2 = 0 - for hmap in habmaps.values(): - if hmap in path: - habmap = hmap - c2 += 1 - if c1 == 1 and c2 == 1: - df.loc[(taxid, season, taxclass), habmaps_r[habmap]] = file -df = df.reset_index() -if "historic" not in df.columns: - df['historic'] = "nan" - -filename = os.path.join(args["output_dir"], f"g_file_index_{args['scenario']}_lj.csv") -with open(filename, "w+") as out_file: - out_file.write("--current_path,--scenario_path,--historic_path,--output_path,--z") - out_file.write("\n") - - for i, (idx, row) in enumerate(df.iterrows()): - for z in z_values: - print(f"Writing littlejohn arguments to {filename}: ", round(i/len(df), 4), end = "\r" ) - curr = row.current - scen = row.scenario - hist = row.historic - ofname = f"Seasonality.{row.season}-{row.taxid}.tif" - of = os.path.join(args["output_dir"], args["scenario"], str(z), row.taxclass, ofname) - - out_file.write(f"{curr},{scen},{hist},{of},{str(z)}") - out_file.write("\n") - - - \ No newline at end of file diff --git a/deltap/global_code_residents_pixel.py b/deltap/global_code_residents_pixel.py new file mode 100644 index 0000000..5677be9 --- /dev/null +++ b/deltap/global_code_residents_pixel.py @@ -0,0 +1,298 @@ +import argparse +import math +import os +import shutil +import sys +from enum import Enum +from tempfile import TemporaryDirectory + +import geopandas as gpd +import numpy as np +from osgeo import gdal +from yirgacheffe.layers import RasterLayer, ConstantLayer + +GOMPERTZ_A = 2.5 +GOMPERTZ_B = -14.5 +GOMPERTZ_ALPHA = 1 + +class Season(Enum): + RESIDENT = 1 + BREEDING = 2 + NONBREEDING = 3 + +def gen_gompertz(x: float) -> float: + return math.exp(-math.exp(GOMPERTZ_A + (GOMPERTZ_B * (x ** GOMPERTZ_ALPHA)))) + +def numpy_gompertz(x: float) -> float: + return np.exp(-np.exp(GOMPERTZ_A + (GOMPERTZ_B * (x ** GOMPERTZ_ALPHA)))) + +def open_layer_as_float64(filename: str) -> RasterLayer: + if filename == "nan": + return ConstantLayer(0.0) + layer = RasterLayer.layer_from_file(filename) + if layer.datatype == gdal.GDT_Float64: + return layer + layer64 = RasterLayer.empty_raster_layer_like(layer, datatype=gdal.GDT_Float64) + layer.save(layer64) + return layer64 + +def calc_persistence_value(current_aoh: float, historic_aoh: float, exponent_func) -> float: + sp_p = exponent_func(current_aoh / historic_aoh) + sp_p_fix = np.where(sp_p > 1, 1, sp_p) + return sp_p_fix + +def process_delta_p( + current: RasterLayer, + scenario: RasterLayer, + current_aoh: float, + historic_aoh: float, + exponent_func_raster +) -> RasterLayer: + # In theory we could recalc current_aoh, but given we already have it don't duplicate work + # New section added in: Calculating for rasters rather than csv's + const_layer = ConstantLayer(current_aoh) + calc_1 = (const_layer - current) + scenario + new_aoh = RasterLayer.empty_raster_layer_like(current) + calc_1.save(new_aoh) + + calc_2 = (new_aoh / historic_aoh).numpy_apply(exponent_func_raster) + calc_2 = calc_2.numpy_apply(lambda chunk: np.where(chunk > 1, 1, chunk)) + new_p = RasterLayer.empty_raster_layer_like(new_aoh) + calc_2.save(new_p) + + return new_p + +def global_code_residents_pixel_ae( + species_data_path: str, + current_aohs_path: str, + scenario_aohs_path: str, + historic_aohs_path: str, + exponent: str, + output_folder: str, +) -> None: + os.makedirs(output_folder, exist_ok=True) + + os.environ["OGR_GEOJSON_MAX_OBJ_SIZE"] = "0" + try: + filtered_species_info = gpd.read_file(species_data_path) + except: # pylint:disable=W0702 + sys.exit(f"Failed to read {species_data_path}") + taxid = filtered_species_info.id_no.values[0] + season = Season[filtered_species_info.season.values[0]] + + try: + exp_val = float(exponent) + z_exponent_func_float = lambda x: np.float_power(x, exp_val) + z_exponent_func_raster = lambda x: np.float_power(x, exp_val) + except ValueError: + if exponent == "gompertz": + z_exponent_func_float = gen_gompertz + z_exponent_func_raster = numpy_gompertz + else: + sys.exit(f"unrecognised exponent {exponent}") + + match season: + case Season.RESIDENT: + filename = f"{taxid}_{season.name}.tif" + try: + current = open_layer_as_float64(os.path.join(current_aohs_path, filename)) + except FileNotFoundError: + print(f"Failed to open current layer {os.path.join(current_aohs_path, filename)}") + sys.exit() + + try: + scenario = open_layer_as_float64(os.path.join(scenario_aohs_path, filename)) + except FileNotFoundError: + # If there is a current but now scenario file it's because the species went extinct under the scenario + scenario = ConstantLayer(0.0) + + try: + historic_aoh = RasterLayer.layer_from_file(os.path.join(historic_aohs_path, filename)).sum() + except FileNotFoundError: + print(f"Failed to open historic layer {os.path.join(historic_aohs_path, filename)}") + sys.exit() + + if historic_aoh == 0.0: + print(f"Historic AoH for {taxid} is zero, aborting") + sys.exit() + + # print(f"current: {current.sum()}\nscenario: {scenario.sum()}\nhistoric: {historic_aoh.sum()}") + + layers = [current, scenario] + union = RasterLayer.find_union(layers) + for layer in layers: + try: + layer.set_window_for_union(union) + except ValueError: + pass + + current_aoh = current.sum() + + new_p_layer = process_delta_p(current, scenario, current_aoh, historic_aoh, z_exponent_func_raster) + print(new_p_layer.sum()) + + old_persistence = calc_persistence_value(current_aoh, historic_aoh, z_exponent_func_float) + print(old_persistence) + calc = new_p_layer - ConstantLayer(old_persistence) + + with TemporaryDirectory() as tmpdir: + tmpfile = os.path.join(tmpdir, filename) + with RasterLayer.empty_raster_layer_like(new_p_layer, filename=tmpfile) as delta_p: + calc.save(delta_p) + shutil.move(tmpfile, os.path.join(output_folder, filename)) + + case Season.NONBREEDING: + nonbreeding_filename = f"{taxid}_{Season.NONBREEDING.name}.tif" + breeding_filename = f"{taxid}_{Season.BREEDING.name}.tif" + + try: + with RasterLayer.layer_from_file(os.path.join(historic_aohs_path, breeding_filename)) as aoh: + historic_aoh_breeding = aoh.sum() + if historic_aoh_breeding == 0.0: + print(f"Historic AoH breeding for {taxid} is zero, aborting") + sys.exit() + except FileNotFoundError: + print(f"Historic AoH for breeding {taxid} not found, aborting") + sys.exit() + try: + with RasterLayer.layer_from_file(os.path.join(historic_aohs_path, nonbreeding_filename)) as aoh: + historic_aoh_non_breeding = aoh.sum() + if historic_aoh_non_breeding == 0.0: + print(f"Historic AoH for non breeding {taxid} is zero, aborting") + sys.exit() + except FileNotFoundError: + print(f"Historic AoH for non breeding {taxid} not found, aborting") + sys.exit() + + + if scenario_aohs_path != "nan": + non_breeding_scenario_path = os.path.join(scenario_aohs_path, nonbreeding_filename) + breeding_scenario_path = os.path.join(scenario_aohs_path, breeding_filename) + else: + non_breeding_scenario_path = "nan" + breeding_scenario_path = "nan" + + try: + current_breeding = open_layer_as_float64(os.path.join(current_aohs_path, breeding_filename)) + except FileNotFoundError: + print(f"Failed to open current breeding {os.path.join(current_aohs_path, breeding_filename)}") + sys.exit() + try: + current_non_breeding = open_layer_as_float64(os.path.join(current_aohs_path, nonbreeding_filename)) + except FileNotFoundError: + print(f"Failed to open current non breeding {os.path.join(current_aohs_path, nonbreeding_filename)}") + sys.exit() + try: + scenario_breeding = open_layer_as_float64(breeding_scenario_path) + except FileNotFoundError: + # If there is a current but now scenario file it's because the species went extinct under the scenario + scenario_breeding = ConstantLayer(0.0) + try: + scenario_non_breeding = open_layer_as_float64(non_breeding_scenario_path) + except FileNotFoundError: + # If there is a current but now scenario file it's because the species went extinct under the scenario + scenario_non_breeding = ConstantLayer(0.0) + + layers = [current_breeding, current_non_breeding, scenario_breeding, scenario_non_breeding] + union = RasterLayer.find_union(layers) + for layer in layers: + try: + layer.set_window_for_union(union) + except ValueError: + pass + + current_aoh_breeding = current_breeding.sum() + persistence_breeding = calc_persistence_value( + current_aoh_breeding, + historic_aoh_breeding, + z_exponent_func_float + ) + + current_aoh_non_breeding = current_non_breeding.sum() + persistence_non_breeding = calc_persistence_value( + current_aoh_non_breeding, + historic_aoh_non_breeding, + z_exponent_func_float + ) + + old_persistence = (persistence_breeding ** 0.5) * (persistence_non_breeding ** 0.5) + + new_p_breeding = process_delta_p( + current_breeding, + scenario_breeding, + current_aoh_breeding, + historic_aoh_breeding, + z_exponent_func_raster + ) + new_p_non_breeding = process_delta_p( + current_non_breeding, + scenario_non_breeding, + current_aoh_non_breeding, + historic_aoh_non_breeding, + z_exponent_func_raster + ) + new_p_layer = (new_p_breeding ** 0.5) * (new_p_non_breeding ** 0.5) + + delta_p_layer = new_p_layer - ConstantLayer(old_persistence) + + with TemporaryDirectory() as tmpdir: + tmpfile = os.path.join(tmpdir, nonbreeding_filename) + with RasterLayer.empty_raster_layer_like(new_p_breeding, filename=tmpfile) as output: + delta_p_layer.save(output) + shutil.move(tmpfile, os.path.join(output_folder, nonbreeding_filename)) + + case Season.BREEDING: + pass # covered by the nonbreeding case + case _: + sys.exit(f"Unexpected season for species {taxid}: {season}") + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument( + '--speciesdata', + type=str, + help="Single species/seasonality geojson", + required=True, + dest="species_data_path" + ) + parser.add_argument( + '--current_path', + type=str, + required=True, + dest="current_path", + help="path to species current AOH hex" + ) + parser.add_argument( + '--scenario_path', + type=str, + required=True, + dest="scenario_path", + help="path to species scenario AOH hex" + ) + parser.add_argument( + '--historic_path', + type=str, + required=False, + dest="historic_path", + help="path to species historic AOH hex" + ) + parser.add_argument('--output_path', + type=str, + required=True, + dest="output_path", + help="path to save output csv" + ) + parser.add_argument('--z', dest='exponent', type=str, default='0.25') + args = parser.parse_args() + + global_code_residents_pixel_ae( + args.species_data_path, + args.current_path, + args.scenario_path, + args.historic_path, + args.exponent, + args.output_path, + ) + +if __name__ == "__main__": + main() diff --git a/deltap/global_code_residents_pixel_AE_64.py b/deltap/global_code_residents_pixel_AE_64.py deleted file mode 100644 index 0743885..0000000 --- a/deltap/global_code_residents_pixel_AE_64.py +++ /dev/null @@ -1,219 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Wed May 17 15:51:13 2023 - -@authors: Thomas Ball, Ali Eyres - -This is a modified version of global_code_residents.py that calculates delta p for a set of -Rasters. Any resolution should work since it just uses x/y as identifiers. - -# AE modified from TB's Code for CSVs -# Not sure if it works... - -This isn't tidied or commented properly -""" -import argparse -import os -import math -import re -import sys -import warnings - -# warnings.simplefilter("error") - -import pandas as pd -import numpy as np -from osgeo import gdal -from yirgacheffe.layers import RasterLayer, ConstantLayer - - -quiet = False -overwrite = True - -gompertz_a = 2.5 -gompertz_b = -14.5 -gompertz_alpha = 1 - -def gen_gompertz(x,): - return math.exp(-math.exp(gompertz_a + (gompertz_b * (x ** gompertz_alpha)))) - -def numpy_gompertz(x): - return np.exp(-np.exp(gompertz_a + (gompertz_b * (x ** gompertz_alpha)))) - -parser = argparse.ArgumentParser() -parser.add_argument( - '--current_path', - type=str, - required=True, - dest="current_path", - help="path to species current AOH hex" -) -parser.add_argument( - '--historic_path', - type=str, - required=False, - dest="historic_path", - help="path to species historic AOH hex" -) -parser.add_argument( - '--scenario_path', - type=str, - required=True, - dest="scenario_path", - help="path to species scenario AOH hex" -) -parser.add_argument('--output_path', - type=str, - required=True, - dest="output_path", - help="path to save output csv" -) -parser.add_argument('--z', dest='exponent', type=str, default='0.25') -parser.add_argument('-ht', '--hist_table', - dest = "hist_table", - type = str) -args = vars(parser.parse_args()) - -try: - exp_val = float(args['exponent']) - z_exponent_func_float = lambda x: np.float_power(x, exp_val) - z_exponent_func_raster = lambda x: np.float_power(x, exp_val) -except ValueError: - if args['exponent'] == "gompertz": - z_exponent_func_float = gen_gompertz - z_exponent_func_raster = numpy_gompertz - else: - quit(f"unrecognised exponent {args['exponent']}") - -if (not 'historic_path' in args.keys()) and (not 'hist_table' in args.keys()): - quit("Please provide either historic_path or hist_table arguments") - -if not overwrite and os.path.isfile(args['output_path']): - quit(f"{args['output_path']} exists, set overwrite to False to ignore this.") -path, _ = os.path.split(args["output_path"]) -os.makedirs(path, exist_ok=True) - -FILERE = re.compile(r'.*Seasonality.(\w+)-(\d+).tif$') -season, taxid = FILERE.match(args['current_path']).groups() -season = season.lower() - - -def open_layer_as_float64(filename: str) -> RasterLayer: - if filename == "nan": - return ConstantLayer(0.0) - layer = RasterLayer.layer_from_file(filename) - if layer.datatype == gdal.GDT_Float64: - return layer - layer64 = RasterLayer.empty_raster_layer_like(layer, datatype=gdal.GDT_Float64) - layer.save(layer64) - return layer64 - - -def calc_persistence_value(current_AOH: float, historic_AOH: float, exponent_func) -> float: - sp_P = exponent_func(current_AOH / historic_AOH) - sp_P_fix = np.where(sp_P > 1, 1, sp_P) - return sp_P_fix - - -def process_delta_p(current: RasterLayer, scenario: RasterLayer, current_AOH: float, historic_AOH: float) -> RasterLayer: - # In theory we could recalc current_AOH, but given we already have it don't duplicate work - # New section added in: Calculating for rasters rather than csv's - const_layer = ConstantLayer(current_AOH) # MAKE A LAYER WITH THE SAME PROPERTIES AS CURRENT AOH RASTER BUT FILLED WITH THE CURRENT AOH - calc_1 = (const_layer - current) + scenario # FIRST CALCULATION : NEW AOH - new_AOH = RasterLayer.empty_raster_layer_like(current) - calc_1.save(new_AOH) - - calc_2 = (new_AOH / historic_AOH).numpy_apply(z_exponent_func_raster) - calc_2 = calc_2.numpy_apply(lambda chunk: np.where(chunk > 1, 1, chunk)) - new_p = RasterLayer.empty_raster_layer_like(new_AOH) - calc_2.save(new_p) - - return new_p - - -hdf = pd.read_csv(args['hist_table']) - -if season == 'resident': - try: - current = open_layer_as_float64(args['current_path']) - scenario = open_layer_as_float64(args['scenario_path']) - except FileNotFoundError as fnf: - quit(f"Failed to open {fnf.filename}") - - layers = [current, scenario] - union = RasterLayer.find_union(layers) - for layer in layers: - try: - layer.set_window_for_union(union) - except ValueError: - pass - - current_AOH = current.sum() - historic_AOH = hdf[(hdf.id_no == int(taxid))&(hdf.season == " " + season)].AOH.values[0] - if historic_AOH == 0.0: - quit(f"Historic AoH for {taxid} is zero, aborting") - - new_p_layer = process_delta_p(current, scenario, current_AOH, historic_AOH) - - old_persistence = calc_persistence_value(current_AOH, historic_AOH, z_exponent_func_float) - calc = new_p_layer - ConstantLayer(old_persistence) - delta_p = RasterLayer.empty_raster_layer_like(new_p_layer, filename=args['output_path']) - calc.save(delta_p) - -elif season == 'nonbreeding': - # We have the nonbreeding path, work out the breeding path, check that works, and then do the work. - non_breeding_current_path = args['current_path'] - directory, _ = os.path.split(non_breeding_current_path) - breeding_current_path = os.path.join(directory, f'Seasonality.BREEDING-{taxid}.tif') - - non_breeding_scenario_path = args['scenario_path'] - if non_breeding_scenario_path != "nan": - assert 'NONBREEDING' in non_breeding_scenario_path - directory, _ = os.path.split(non_breeding_scenario_path) - breeding_scenario_path = os.path.join(directory, f'Seasonality.BREEDING-{taxid}.tif') - else: - breeding_scenario_path = non_breeding_scenario_path - - try: - current_breeding = open_layer_as_float64(breeding_current_path) - current_non_breeding = open_layer_as_float64(non_breeding_current_path) - scenario_breeding = open_layer_as_float64(breeding_scenario_path) - scenario_non_breeding = open_layer_as_float64(non_breeding_scenario_path) - except FileNotFoundError as fnf: - quit(f"Failed to open {fnf.filename}") - - layers = [current_breeding, current_non_breeding, scenario_breeding, scenario_non_breeding] - union = RasterLayer.find_union(layers) - for layer in layers: - try: - layer.set_window_for_union(union) - except ValueError: - pass - - - current_AOH_breeding = current_breeding.sum() - historic_AOH_breeding = hdf[(hdf.id_no == int(taxid))&(hdf.season == " " + 'breeding')].AOH.values[0] - if historic_AOH_breeding == 0.0: - quit(f"Historic AoH breeding for {taxid} is zero, aborting") - persistence_breeding = calc_persistence_value(current_AOH_breeding, historic_AOH_breeding, z_exponent_func_float) - - current_AOH_non_breeding = current_non_breeding.sum() - historic_AOH_non_breeding = hdf[(hdf.id_no == int(taxid))&(hdf.season == " " + 'nonbreeding')].AOH.values[0] - if historic_AOH_non_breeding == 0.0: - quit(f"Historic AoH for non breeding {taxid} is zero, aborting") - persistence_non_breeding = calc_persistence_value(current_AOH_non_breeding, historic_AOH_non_breeding, z_exponent_func_float) - - old_persistence = (persistence_breeding ** 0.5) * (persistence_non_breeding ** 0.5) - print(old_persistence) - - new_p_breeding = process_delta_p(current_breeding, scenario_breeding, current_AOH_breeding, historic_AOH_breeding) - new_p_non_breeding = process_delta_p(current_non_breeding, scenario_non_breeding, current_AOH_non_breeding, historic_AOH_non_breeding) - - new_p_layer = (new_p_breeding ** 0.5) * (new_p_non_breeding ** 0.5) - - delta_p_layer = new_p_layer - ConstantLayer(old_persistence) - - output = RasterLayer.empty_raster_layer_like(new_p_breeding, filename=args['output_path']) - delta_p_layer.save(output) - - print(delta_p_layer.sum()) \ No newline at end of file diff --git a/downsample.py b/downsample.py deleted file mode 100644 index 2376a72..0000000 --- a/downsample.py +++ /dev/null @@ -1,84 +0,0 @@ -from math import ceil, floor -import sys - -import numpy as np -from yirgacheffe.layers import RasterLayer, PixelScale - -from osgeo import gdal - -gdal.SetCacheMax(1024 * 1024 * 16) - -target_scale = PixelScale(0.083333333333333, -0.083333333333333) - -try: - source = RasterLayer.layer_from_file(sys.argv[1]) - target_name = sys.argv[2] # pylint: disable=C0103 -except IndexError: - print(f"Usage: {sys.argv[0]} [SRC] [DEST]", file=sys.stderr) - sys.exit(1) - -target = RasterLayer.empty_raster_layer( - area=source.area, - scale=target_scale, - datatype=source.datatype, - filename=target_name, - projection=source.projection -) - -pixels_per_x = source.window.xsize / target.window.xsize -pixels_per_y = source.window.ysize / target.window.ysize - -for y in range(target.window.ysize): - # read all the pixels that will overlap with this row from source - low_y = floor(y * pixels_per_y) - high_y = ceil((y+1) * pixels_per_y) - - band_height = high_y - low_y - band = source.read_array(0, low_y, source.window.xsize, high_y - low_y) - - dest = np.zeros((1, target.window.xsize)) - for x in range(target.window.xsize): - - low_x = floor(x * pixels_per_x) - high_x = ceil((x+1) * pixels_per_x) - - total = np.sum(band[1:band_height - 1, low_x+1:high_x - 1]) - - - # Work out the scaling factors for the sides - - first_y = float(low_y + 1) - (y * pixels_per_y) - assert 0.0 <= first_y <= 1.0 - last_y = ((y + 1) * pixels_per_y) - float(high_y - 1) - assert 0.0 <= last_y <= 1.0 - - first_x = float(low_x + 1) - (x * pixels_per_x) - assert 0.0 <= first_x <= 1.0 - last_x = ((x + 1) * pixels_per_x) - float(high_x - 1) - assert 0.0 <= last_x <= 1.0 - - # major sides - total += np.sum(band[1:band_height - 1, low_x:low_x+1]) * first_y - total += np.sum(band[1:band_height - 1, high_x - 2:high_x - 1]) * last_y - - total += np.sum(band[0][low_x+1:high_x - 1]) * first_x - total += np.sum(band[band_height - 1][low_x + 1:high_x - 1]) * last_x - - # corners - total += band[0][low_x] * first_x * first_y - total += band[band_height - 1][low_x] * first_x * last_y - total += band[0][high_x - 1] * last_x * first_y - total += band[band_height - 1][high_x - 1] * last_x * last_y - - dest[0][x] = total - - - target._dataset.GetRasterBand(1).WriteArray(dest, 0, y) # pylint: disable=W0212 - - -before = source.sum() -after = target.sum() - -print(f"before: {before}") -print(f"after: {after}") -print(f"diff: {((after - before)/before) * 100.0}") diff --git a/flow.yml b/flow.yml deleted file mode 100644 index c56852d..0000000 --- a/flow.yml +++ /dev/null @@ -1,275 +0,0 @@ -IUCN-download: - type: "download" - author: - - ae491 - synopsis: "Download IUCN data from website" - outputs: - - IUCN raw data - -IUCN-importer: - type: "program" - author: - - mb - inputs: - - IUCN raw data - synopsis: "Takes raw IUCN bulk download and turns it to gpkg" - outputs: - - IUCN refined data - -IUCN-filter: - type: "program" - synopsis: "filter the cleaned up IUCN data for the actual experiment" - author: - - ae491 - - mwd24 - inputs: - - IUCN refined data - outputs: - - IUCN experiment data - -"habitat": - type: "group" - children: - habitat-download-current: - type: "download" - author: - - ae491 - ninputs: - - experiment-config - outputs: - - habitat-map-current - - habitat-download-historic: - type: "download" - author: - - ae491 - ninputs: - - experiment-config - outputs: - - habitat-map-historic - - habitat-generate-restore: - type: "program" - author: - - ae491 - inputs: - - habitat-map-current - outputs: - - habitat-map-restore - - habitat-generate-arable: - type: "program" - author: - - ae491 - inputs: - - habitat-map-current - outputs: - - habitat-map-arable - -area-download: - type: "download" - author: - - ae491 - outputs: - - area-file - -area-data-optimize: - type: "program" - author: - - mwd24 - inputs: - - area-file - outputs: - - tiny-area-file - -elevation-download: - type: "download" - author: - - ae491 - outputs: - - elevation-map - - - -"AoH calc": - type: "group" - children: - - speciesgenerator.py: - type: "program" - author: - - mwd24 - synopsis: "Generates the species/seasonality list per project." - inputs: - - IUCN experiment data - outputs: - - species-season-job-list - - "aohcalc.py current_L1L2": - type: "littlejohn" - name: "aohcalc.py" - author: - - mwd24 - synopsis: "Generates raster of area oh habitat per species/seasonality" - inputs: - - elevation-map - - habitat-map-current - - IUCN experiment data - - tiny-area-file - - species-season-job-list - outputs: - - "AoH-raster current_L1L2*" - - "aohcalc.py PNV": - type: "littlejohn" - name: "aohcalc.py" - author: - - mwd24 - synopsis: "Generates raster of area oh habitat per species/seasonality" - inputs: - - elevation-map - - habitat-map-historic - - IUCN experiment data - - tiny-area-file - - species-season-job-list - outputs: - - "AoH PNV CSV" - - "aohcalc.py arable": - type: "littlejohn" - name: "aohcalc.py" - author: - - mwd24 - synopsis: "Generates raster of area oh habitat per species/seasonality" - inputs: - - elevation-map - - habitat-map-arable - - IUCN experiment data - - tiny-area-file - - species-season-job-list - outputs: - - "AoH-raster arable*" - - "aohcalc.py restore": - type: "littlejohn" - name: "aohcalc.py" - author: - - mwd24 - synopsis: "Generates raster of area oh habitat per species/seasonality" - inputs: - - elevation-map - - habitat-map-restore - - IUCN experiment data - - tiny-area-file - - species-season-job-list - outputs: - - "AoH-raster restore*" - - - "downsample.py current_L1L2": - type: "program" - name: "downsample.py" - author: - - mwd24 - synopsis: "Takes the Jung scale rasters and reduces them to 1 arc minute" - inputs: - - "AoH-raster current_L1L2" - outputs: - - "AoH-small-raster current_L1L2*" - - "downsample.py restore": - type: "program" - name: "downsample.py" - author: - - mwd24 - synopsis: "Takes the Jung scale rasters and reduces them to 1 arc minute" - inputs: - - "AoH-raster restore" - outputs: - - "AoH-small-raster restore*" - - "downsample.py arable": - type: "program" - name: "downsample.py" - author: - - mwd24 - synopsis: "Takes the Jung scale rasters and reduces them to 1 arc minute" - inputs: - - "AoH-raster arable" - outputs: - - "AoH-small-raster arable*" - - - -"final": - type: "groups" - names: - - "z=0.1" - - "z=0.25" - - "z=0.5" - - "z=1.0" - - "z=gompertz" - children: - "gcrgen.py arable": - type: "program" - name: "gcrgen.py" - author: - - tsb42 - inputs: - - "AoH-small-raster arable" - outputs: - - deltap-job-list-arable - - "gcrgen.py restore": - type: "program" - name: "gcrgen.py" - author: - - tsb42 - inputs: - - "AoH-small-raster restore" - outputs: - - deltap-job-list-restore - - "generate-difference-tifs arable": - type: "littlejohn" - author: - - mwd24 - - ae491 - - tsb42 - inputs: - - deltap-job-list-arable - - "AoH-small-raster arable" - - "AoH-small-raster current_L1L2" - - "AoH PNV CSV" - outputs: - - "per-species-delta-p-tif arable*" - - "generate-difference-tifs restore": - type: "littlejohn" - author: - - mwd24 - - ae491 - - tsb42 - inputs: - - deltap-job-list-restore - - "AoH-small-raster restore" - - "AoH-small-raster current_L1L2" - - "AoH PNV CSV" - outputs: - - "per-species-delta-p-tif restore*" - - "generate-deltap-per-taxa": - type: "program" - synopsis: "aka raster_sum.py" - inputs: - - "per-species-delta-p-tif arable" - outputs: - - delta-p-per-taxa-arable.tif - - "generate-deltap-per-taxa": - type: "program" - synopsis: "aka raster_sum.py" - inputs: - - "per-species-delta-p-tif restore" - outputs: - - delta-p-per-taxa-restore.tif diff --git a/h3calculate.py b/h3calculate.py deleted file mode 100644 index 53f3a8b..0000000 --- a/h3calculate.py +++ /dev/null @@ -1,375 +0,0 @@ -import itertools -import json -import math -import os -import re -import subprocess -import sys -import time -from multiprocessing import Pool, cpu_count - -import h3 -import pandas as pd -import pyarrow as pa -import pyarrow.parquet as pq -from osgeo import ogr -from yirgacheffe.layers import RasterLayer, VectorLayer, H3CellLayer -from yirgacheffe.window import PixelScale - -try: - COMMIT = subprocess.check_output('git rev-parse HEAD', shell=True).decode('utf-8').strip() - if len(subprocess.check_output('git diff -q', shell=True)) != 0: - COMMIT += '*' -except subprocess.CalledProcessError: - COMMIT = 'unknown' - -# This regular expression is how we get the species ID from the filename -FILERE = re.compile(r'^Seasonality.(\w+)-(\d+).tif$') - -MAG = 7 - -LONG_BAND_WIDTH = 1.0 - -def threads() -> int: - return cpu_count() - -def geometry_to_pointlist(geo): - points = [] - for i in range(geo.GetPointCount()): - point = geo.GetPoint(i) - points.append((point[1], point[0])) - return points - -def geometry_to_polygons(geo, subdivide=True): - geotype = geo.GetGeometryType() - - if geotype == ogr.wkbMultiPolygon: - count = geo.GetGeometryCount() - polygons = [] # [None] * count - for i in range(count): - subgeometry = geo.GetGeometryRef(i) - subpolys = geometry_to_polygons(subgeometry) - assert len(subpolys) == 1 - polygon = subpolys[0] - - # envelope is (long_left, long_right, lat_bottom, lat_top) - envelope = subgeometry.GetEnvelope() - longitude_width = envelope[1] - envelope[0] - if (longitude_width < LONG_BAND_WIDTH) or not subdivide: - polygons.append(polygon) - continue - - # This poly is quite wide, so split it into smaller chunks - # OGR is quite slow (relative to the test of the work here) - # so we just do a simple lat banding - try: - slices = [] - for i in range(math.ceil(longitude_width / LONG_BAND_WIDTH)): - left = envelope[0] + (i * LONG_BAND_WIDTH) - right = envelope[0] + ((i + 1) * LONG_BAND_WIDTH) - frame = { - 'type': 'POLYGON', - 'coordinates': [ - [ - [left, envelope[3]], - [right, envelope[3]], - [right, envelope[2]], - [left, envelope[2]], - [left, envelope[3]], - ] - ] - } - band_geometry = ogr.CreateGeometryFromJson(json.dumps(frame)) - if band_geometry is None: - raise ValueError("Failed to create mask for slicing") - intersection = subgeometry.Intersection(band_geometry) - if intersection is None: - raise ValueError("Failed to create intersection") - if not intersection.IsEmpty(): - slices.append(intersection) - - for intersection in slices: - polygons += geometry_to_polygons(intersection, subdivide=False) - except ValueError: - # In rare cases it seems OGR doesn't like the original geometry for - # creating an intersection. I've seen errors like: - # - # ERROR 1: TopologyException: Input geom 0 is invalid: Ring Self-intersection at or near point... - # - # and the general advice I've seen is to keep fudging geometries until it - # works, which isn't a scalable solution. Instead we just take the hit and turn the entire - # polygon into hextiles in a single pass. - polygons.append(polygon) - - return polygons - - elif geotype == ogr.wkbPolygon: - points = [] - for i in range(geo.GetGeometryCount()): - points.append(geometry_to_pointlist(geo.GetGeometryRef(i))) - polygon = h3.Polygon(*points) # pylint: disable=E1120 - return [polygon] - - elif geotype == ogr.wkbGeometryCollection: - count = geo.GetGeometryCount() - polygons = [] - for i in range(count): - polygons += geometry_to_polygons(geo.GetGeometryRef(i), subdivide=False) - return polygons - - elif geotype == ogr.wkbPoint: - print(geo) - return [] - - else: - raise ValueError(f"unknown type {geotype}: {geo.GetGeometryName()}") - -def polygon_to_tiles(polygon): - - list_of_tiles = [] - - # First we get all the cells with a mid point within the polygon - try: - tiles = h3.polygon_to_cells(polygon, MAG) - list_of_tiles.append(tiles) - except MemoryError: - # It seems that in some rare cases we have generated very narrow slices as a result of the - # fragmenting we do, and that causes h3 to get super confused and run out of memory. This - # is most likely a bug in h3 but I can't say why currently, and don't have more time right - # now to dig in. Thankfully though, because of the second stage in this method where we - # expand the boundary of the polygon, this kinda fixes skipping the polygon_to_cells in - # all cases I've seen, so we at least have a temporary work around. - pass - - # now for every vertice on the polygon, work use the minimum distance path to approximate - # all cells on the boundary - polygons = [polygon.outer] + list(polygon.holes) - for loop in polygons: - if loop[0] != loop[-1]: - loop.append(loop[0]) - for i in range(len(loop) - 1): - start = loop[i] - end = loop[i + 1] - start_cell = h3.latlng_to_cell(*start, MAG) - end_cell = h3.latlng_to_cell(*end, MAG) - - line = [start_cell, end_cell] - - if start_cell != end_cell: - try: - distance_estimate = h3.grid_distance(start_cell, end_cell) - except Exception as exc: # pylint: disable=W0718 - # if the distance is too far then h3 will give up - # this is usually along the boundaries added by - # the chunking we do to let us parallelise things, and so - # we don't mind, as the polygon_to_cell is sufficient there - print(f'Failed to find path from {start} to {end}: {exc}') - continue - - # In an ideal world we'd use h3.grid_path_cells() for this, but in some places - # we observe that this does not take the direct route, and the docs do not - # say that it'll produce an optimal output, nor that the results are stable. - # Instead we do this approximation by hand, which isn't guaranteed to generate - # a contiguous line of cells, but is good enough, particularly once we add - # cell padding, which we did anyway even on the original implementation that - # had h3.grid_path_cells() - if distance_estimate: - diffs = ( - (end[0] - start[0]) / float(distance_estimate), - (end[1] - start[1]) / float(distance_estimate) - ) - for i in range(distance_estimate): - here = (start[0] + (diffs[0] * i), start[1] + (diffs[1] * i)) - cell = h3.latlng_to_cell(*here, MAG) - assert h3.is_valid_cell(cell) - line.append(cell) - else: - line = h3.grid_path_cells( - h3.latlng_to_cell(*start, MAG), - h3.latlng_to_cell(*end, MAG) - ) - - list_of_tiles.append(line) - for cell in line: - list_of_tiles.append(h3.grid_disk(cell, 3)) - - - tiles = itertools.chain.from_iterable(list_of_tiles) - - return tiles - -def process_cell(args): - aoh_layer_path, tile = args - - # Load the raster of total aoh of species - aoh_layer = RasterLayer.layer_from_file(aoh_layer_path) - - # create a layer the H3 cell of interest - tile_layer = H3CellLayer(tile, aoh_layer.pixel_scale, aoh_layer.projection) - - # calculate intersection - layers = [aoh_layer, tile_layer] - try: - intersection = RasterLayer.find_intersection(layers) - except ValueError: - return (tile, 0.0) - for layer in layers: - try: - layer.set_window_for_intersection(intersection) - except: - print(f'Failed to intersect {tile} with for {layer} with area {layer.area} and {intersection}') - raise - - # work out area of habitate contributed by just that cell - calc = aoh_layer * tile_layer - try: - tile_aoh = calc.sum() - except: - print(f' Failed to process {tile} with {intersection} at scale {aoh_layer.pixel_scale}') - raise - - return (tile, tile_aoh) - - -def tiles_to_area(aoh_layer_path, species_id, tiles, target_file, timestamp_2): - # we now have all the tiles, so work out the AoH in just that tile - results = [] - args = [(aoh_layer_path, tile) for tile in tiles] - - with Pool(processes=threads()) as pool: - results = pool.map(process_cell, args) - - timestamp_3 = time.time() - print(f"Processed {len(tiles)} tiles in {timestamp_3 - timestamp_2} seconds "\ - "- {float(len(tiles)) / (timestamp_3 - timestamp_2)} tiles per second") - - dataframe = pd.DataFrame(results, columns=['cell', 'area']) - table = pa.Table.from_pandas(dataframe).replace_schema_metadata({ - b'experiment': json.dumps({ - 'species': species_id, - 'source': aoh_layer_path, - 'user': os.environ['USER'], - 'timestamp': time.time(), - 'host': os.uname()[1], - 'src': __file__, - 'commit': COMMIT, - }).encode('utf8') - }) - pq.write_table(table, target_file, compression='GZIP') - - return dataframe.loc[:, 'area'].sum() - - -def get_original_aoh_info(raster_path: str) -> float: - aoh_layer = RasterLayer.layer_from_file(raster_path) - return aoh_layer.sum() - - -def get_range_polygons(range_path, species_id): - where_filter = f"id_no = {species_id} and season = 'resident'" - - # The pixel scale and projection don't matter here, as we're just - # abusing yirgacheffe to load the range file. Feels like I need to split this - # out at some point - layer = VectorLayer(range_path, where_filter, PixelScale(0.1, 0.1), "UNUSED") - range_layer = layer.layer - range_layer.ResetReading() - polygons = [] - feature = range_layer.GetNextFeature() - while feature: - geo = feature.GetGeometryRef() - polygons.append(geometry_to_polygons(geo, subdivide=True)) - feature = range_layer.GetNextFeature() - return list(itertools.chain.from_iterable(polygons)) - - -def main() -> None: - if len(sys.argv) != 5: - print(f'Usage: {sys.argv[0]} [AoH raster directory] [Range file] [Output directory] [Direction]') - sys.exit(1) - - current_rasters_dir = sys.argv[1] - range_file = sys.argv[2] - output_dir = sys.argv[3] - direction = sys.argv[4] - - print(direction) - try: - os.makedirs(output_dir, exist_ok=True) - except FileExistsError: - print(f'Could not create {output_dir} as file is there') - sys.exit(1) - except PermissionError: - print(f'Could not create {output_dir} due to permissions') - sys.exit(1) - - species_list = [FILERE.match(x).groups() for x in os.listdir(current_rasters_dir) if FILERE.match(x)] - - if direction == "forward": - print("Running H3 forward") - species_list.sort() - elif direction == "reverse": - print("Running H3 backwards") - species_list.sort(reverse=True) - - # for test run, just do first dozen - for season, species_id in species_list: - print(species_id, season) - - file_prefix = season.lower()[:3] - - # Due to errors as we find new corner cases, we keep having to restart the script - # so we don't overwrite old results and just keep moving on. - old_target_file = os.path.join(output_dir, f'{file_prefix}_{species_id}_{MAG}.csv') - target_file = os.path.join(output_dir, f'{file_prefix}_{species_id}_{MAG}.parquet') - if os.path.exists(target_file) or os.path.exists(old_target_file): - print('Species result exists, skipping') - continue - - start = time.time() - aoh_layer_path = os.path.join(current_rasters_dir, f'Seasonality.{season}-{species_id}.tif') - - # We can't currently parallelise either of these tasks, but they are independant, so we can - # at least run them concurrently... - try: - with Pool(processes=threads()) as pool: - res_aoh_total = pool.apply_async(get_original_aoh_info, (aoh_layer_path,)) - res_polygons = pool.apply_async(get_range_polygons, (range_file, species_id)) - - aoh_layer_total = res_aoh_total.get() - polygons = res_polygons.get() - except (FileNotFoundError, TypeError): - print(f'Failed to load raster for {species_id}, skipping') - continue - except ValueError: - print(f'Species {species_id} had bad range, skipping') - continue - - if aoh_layer_total == 0.0: - print(f'Skipping species, as AoH is {aoh_layer_total}') - continue - - timestamp_1 = time.time() - print(f"Found {len(polygons)} polygons in {timestamp_1 - start} seconds") - - # The h3 lookup can be ran concurrently thought - tiles = set() - with Pool(processes=threads()) as pool: - results = pool.map(polygon_to_tiles, polygons) - tiles = set(itertools.chain.from_iterable(results)) - - timestamp_2 = time.time() - print(f"Found {len(tiles)} tiles in {timestamp_2 - timestamp_1} seconds") - - total = tiles_to_area(aoh_layer_path, species_id, tiles, target_file, timestamp_2) - diff = ((total - aoh_layer_total) / aoh_layer_total) * 100.0 - if f'{abs(diff):.5f}' != '0.00000': - print(f'AoH layer total: {aoh_layer_total}') - print(f'Hex tile total: {total}') - print(f'Error is {diff:.5f}%') - - end = time.time() - print(f'{species_id} at mag {MAG} took {end - start} seconds') - -if __name__ == "__main__": - main() diff --git a/method.md b/method.md new file mode 100644 index 0000000..1f6af8d --- /dev/null +++ b/method.md @@ -0,0 +1,355 @@ +--- +path: /root +TAXA: +- AMPHIBIA +- AVES +CURVE: +- "0.25" +--- + +# How to run the pipeline for LIFE + +From [Eyres et al](https://www.cambridge.org/engage/coe/article-details/65801ab4e9ebbb4db92dad33). + +## Build the environment + +### The geospatial compute container + +The dockerfile that comes with the repo should be used to run the pipeline. + +``` +docker build buildx --tag aohbuilder . +``` + +For use with the [shark pipeline](https://github.com/quantifyearth/shark), we need this block to trigger a build currently: + +```shark-build:aohbuilder +((from ghcr.io/osgeo/gdal:ubuntu-small-3.8.5) + (run (network host) (shell "apt-get update -qqy && apt-get -y install python3-pip libpq-dev git && rm -rf /var/lib/apt/lists/* && rm -rf /var/cache/apt/*")) + (run (network host) (shell "pip install --upgrade pip")) + (run (network host) (shell "pip install 'numpy<2'")) + (run (network host) (shell "pip install gdal[numpy]==3.8.5")) + (run (shell "mkdir -p /root")) + (workdir "/root") + (copy (src "requirements.txt") (dst "./")) + (copy (src "aoh-calculator") (dst "./")) + (run (network host) (shell "pip install --no-cache-dir -r requirements.txt")) +) +``` + +For the primary data sources we fetch them directly from Zenodo/GitHub to allow for obvious provenance. + +```shark-build:reclaimer +((from carboncredits/reclaimer:latest)) +``` + +For the projection changes we use a barebones GDAL container. The reason for this is that these operations are expensive, and we don't want to re-execute them if we update our code. + +```shark-build:gdalonly +((from ghcr.io/osgeo/gdal:ubuntu-small-3.8.5)) +``` + +Alternatively you can build your own python virtual env assuming you have everything required. For this you will need at least a GDAL version installed locally, and you may want to update requirements.txt to match the python GDAL bindings to the version you have installed. + +``` +python3 -m virtualenv ./venv +. ./venv/bin/activate +pip install -r requirements.txt +``` + +### The PostGIS container + +For querying the IUCN data held in the PostGIS database we use a seperate container, based on the standard PostGIS image. + +```shark-build:postgis +((from python:3.12-slim) + (run (network host) (shell "apt-get update -qqy && apt-get -y install libpq-dev gcc git && rm -rf /var/lib/apt/lists/* && rm -rf /var/cache/apt/*")) + (run (network host) (shell "pip install psycopg2 postgis geopandas")) + (run (network host) (shell "pip install git+https://github.com/quantifyearth/pyshark")) + (copy (src "./prepare-species") (dst "/root/")) + (workdir "/root/") +) +``` + +## Fetching the required data + +```shark-build:layer-prep +((from ghcr.io/osgeo/gdal:ubuntu-small-3.8.5) + (run (network host) (shell "apt-get update -qqy && apt-get -y install python3-pip libpq-dev git && rm -rf /var/lib/apt/lists/* && rm -rf /var/cache/apt/*")) + (run (network host) (shell "pip install --upgrade pip")) + (run (network host) (shell "pip install 'numpy<2'")) + (run (network host) (shell "pip install gdal[numpy]==3.8.5")) + (run (shell "mkdir -p /root")) + (workdir "/root") + (copy (src "requirements.txt") (dst "./")) + (copy (src "aoh-calculator") (dst "./")) + (run (network host) (shell "pip install --no-cache-dir -r requirements.txt")) + (copy (src "prepare-layers") (dst "./")) +) +``` + +To calculate the AoH we need various basemaps: + +- Habitat maps for four scenarios: + - Current day, in both L1 and L2 IUCN habitat classification + - Potential Natural Vegetation (PNV) showing the habitats predicted without human intevention + - Restore scenario - a map derived from the PNV and current maps showing certain lands restored to their pre-human + - Conserve scenario - a map derived form current indicating the impact of placement of arable lands +- The Digital Elevation Map (DEM) which has the height per pixel in meters + +All these maps must be at the same pixel spacing and projection, and the output AoH maps will be at that same pixel resolution and projection. + +Habitat maps store habitat types in int types typically, the IUCN range data for species are of the form 'x.y' or 'x.y.z', and so you will need to also get a crosswalk table that maps between the IUCN ranges for the species and the particular habitat map you are using. + +### Fetching the habitat maps + +LIFE uses the work of Jung et al to get both the [current day habitat map](https://zenodo.org/records/4058819) and the [PNV habitat map](https://zenodo.org/records/4038749). + +To assist with provenance, we download the data from the Zenodo ID. + +```shark-run:reclaimer +reclaimer zenodo --zenodo_id 4038749 \ + --filename pnv_lvl1_004.zip \ + --extract \ + --output /data/habitat/pnv_raw.tif +reclaimer zenodo --zenodo_id 4058819 \ + --filename iucn_habitatclassification_composite_lvl2_ver004.zip \ + --extract \ + --output /data/habitat/jung_l2_raw.tif +``` + +For LIFE the crosswalk table is generated using code by Daniele Baisero's [IUCN Modlib](https://gitlab.com/daniele.baisero/iucn-modlib/) package: + +```shark-run:layer-prep +python3 ./prepare-layers/generate_crosswalk.py --output /data/crosswalk.csv +``` + +The PNV map is only classified at Level 1 of the IUCN habitat codes, and so to match this non-artificial habitats in the L2 map are converted, as per Eyres et al: + +| The current layer maps IUCN level 1 and 2 habitats, but habitats in the PNV layer are mapped only at IUCN level 1, so to estimate species’ proportion of original AOH now remaining we could only use natural habitats mapped at level 1 and artificial habitats at level 2. + +```shark-run:layer-prep +python3 ./prepare-layers/make_current_map.py --jung /data/habitat/jung_l2_raw.tif \ + --crosswalk /data/crosswalk.csv \ + --output /data/habitat/current_raw.tif \ + -j 16 +``` + +The habitat map by Jung et al is at 100m resolution in World Berhman projection, and for IUCN compatible AoH maps we use Molleide at 1KM resolution, so we use GDAL to do the resampling for this: + +```shark-run:layer-prep +python3 ./aoh-calculator/habitat_process.py --habitat /data/habitat/pnv_raw.tif \ + --scale 0.016666666666667 \ + --output /data/habitat_maps/pnv/ +``` + +```shark-run:layer-prep +python3 ./aoh-calculator/habitat_process.py --habitat /data/habitat/current_raw.tif \ + --scale 0.016666666666667 \ + --output /data/habitat_maps/current/ +``` + +### Generating additional habitat maps + +LIFE calculates the impact on extinction rates under two future scenarios: restoration of habitats to their pre-human state, and the converstion of non-urban terrestrial habitat to arable. + +The definition of the restore layer from Section 5 of [Eyres et al](https://www.cambridge.org/engage/coe/article-details/65801ab4e9ebbb4db92dad33) is: + +| In the restoration scenario all areas classified as arable or pasture were restored to their PNV. + +```shark-run:layer-prep +python3 ./prepare-layers/make_restore_map.py --pnv /data/habitat/pnv_raw.tif \ + --current /data/habitat/current_raw.tif \ + --crosswalk /data/crosswalk.csv \ + --output /data/habitat/restore.tif + + python3 ./aoh-calculator/habitat_process.py --habitat /data/habitat/restore.tif \ + --scale 0.016666666666667 \ + --output /data/habitat_maps/restore/ +``` + +The definition of the arable layer from Section 5 of [Eyres et al](https://www.cambridge.org/engage/coe/article-details/65801ab4e9ebbb4db92dad33) is: + +| In the conversion scenario all habitats currently mapped as natural or pasture were converted to arable land. + +```shark-run:layer-prep +python3 ./prepare-layers/make_arable_map.py --current /data/habitat/current_raw.tif \ + --crosswalk /data/crosswalk.csv \ + --output /data/habitat/arable.tif + +python3 ./aoh-calculator/habitat_process.py --habitat /data/habitat/arable.tif \ + --scale 0.016666666666667 \ + --output /data/habitat_maps/arable/ +``` + +### Generate area map + +For LIFE we need to know the actual area, not just pixel count. For this we generate a map that contains the area per pixel for a given latitude which is one pixel wide, and then we sweep that across for a given longitude. + +```shark-run:layer-prep +python3 ./prepare-layers/make_area_map.py --scale 0.016666666666667 --output /data/area-per-pixel.tif +``` + +### Differences maps +In the algorithm we use need to account for map projection distortions, so all values in the AoHs are based on the area per pixel. To get the final extinction risk values we must remove that scaling. To do that we generate a map of area difference from current for the given scenario. + +```shark-run:layer-prep +python3 ./prepare-layers/make_diff_map.py --current /data/habitat/current_raw.tif \ + --scenario /data/habitat/restore.tif \ + --area /data/area-per-pixel.tif \ + --scale 0.016666666666667 \ + --output /data/habitat/restore_diff_area.tif +``` + +```shark-run:layer-prep +python3 ./prepare-layers/make_diff_map.py --current /data/habitat/current_raw.tif \ + --scenario /data/habitat/arable.tif \ + --area /data/area-per-pixel.tif \ + --scale 0.016666666666667 \ + --output /data/habitat/arable_diff_area.tif +``` + +### Fetching the elevation map + +To assist with provenance, we download the data from the Zenodo ID. + +```shark-run:reclaimer +reclaimer zenodo --zenodo_id 5719984 --filename dem-100m-esri54017.tif --output /data/elevation.tif +``` + +Similarly to the habitat map we need to resample to 1km, however rather than picking the mean elevation, we select both the min and max elevation for each pixel, and then check whether the species is in that range when we calculate AoH. + +```shark-run:gdalonly +gdalwarp -t_srs EPSG:4326 -tr 0.016666666666667 -0.016666666666667 -r min -co COMPRESS=LZW -wo NUM_THREADS=40 /data/elevation.tif /data/elevation-min-1k.tif +gdalwarp -t_srs EPSG:4326 -tr 0.016666666666667 -0.016666666666667 -r max -co COMPRESS=LZW -wo NUM_THREADS=40 /data/elevation.tif /data/elevation-max-1k.tif +``` + +## Calculating AoH + +Once all the data has been collected, we can now calclate the AoH maps. + +### Get per species range data + +Rather than calculate from the postgis database directly, we first split out the data into a single GeoJSON file per species per season: + +```shark-run:postgis +export DB_HOST=somehost +export DB_USER=username +export DB_PASSWORD=secretpassword +export DB_NAME=iucnredlist + +python3 ./prepare-species/extract_species_psql.py --class %{TAXA} --output /data/species-info/%{TAXA}/ --projection "EPSG:4326" +``` + +The reason for doing this primarly one of pipeline optimisation, though it also makes the tasks of debugging and provenance tracing much easier. Most build systems, including the one we use, let you notice when files have updated and only do the work required based on that update. If we have many thousands of species on the redlise and only a few update, if we base our calculation on a single file with all species in, we'll have to calculate all thousands of results. But with this step added in, we will re-generate the per species per season GeoJSON files, which is cheap, but then we can spot that most of them haven't changed and we don't need to then calculate the rasters for those ones in the next stage. + +### Calculate AoH + +This step generates a single AoH raster for a single one of the above GeoJSON files. + +```shark-run:aohbuilder +python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/current/ \ + --elevation-max /data/elevation-max-1k.tif \ + --elevation-min /data/elevation-min-1k.tif \ + --area /data/area-per-pixel.tif \ + --crosswalk /data/crosswalk.csv \ + --speciesdata /data/species-info/%{TAXA}/current/* \ + --output /data/aohs/current/%{TAXA}/ + +python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/restore/ \ + --elevation-max /data/elevation-max-1k.tif \ + --elevation-min /data/elevation-min-1k.tif \ + --area /data/area-per-pixel.tif \ + --crosswalk /data/crosswalk.csv \ + --speciesdata /data/species-info/%{TAXA}/current/* \ + --output /data/aohs/restore/%{TAXA}/ + +python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/arable/ \ + --elevation-max /data/elevation-max-1k.tif \ + --elevation-min /data/elevation-min-1k.tif \ + --area /data/area-per-pixel.tif \ + --crosswalk /data/crosswalk.csv \ + --speciesdata /data/species-info/%{TAXA}/current/* \ + --output /data/aohs/arable/%{TAXA}/ + +python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/pnv/ \ + --elevation-max /data/elevation-max-1k.tif \ + --elevation-min /data/elevation-min-1k.tif \ + --area /data/area-per-pixel.tif \ + --crosswalk /data/crosswalk.csv \ + --speciesdata /data/species-info/%{TAXA}/historic/* \ + --output /data/aohs/pnv/%{TAXA}/ +``` + +The results you then want will all be in: + +```shark-publish2 +/data/aohs/current/ +/data/aohs/restore/ +/data/aohs/arable/ +/data/aohs/pnv/ +``` + +## Calculating persistence maps + + +```shark-build:deltap +((from ghcr.io/osgeo/gdal:ubuntu-small-3.8.5) + (run (network host) (shell "apt-get update -qqy && apt-get -y install python3-pip libpq-dev git && rm -rf /var/lib/apt/lists/* && rm -rf /var/cache/apt/*")) + (run (network host) (shell "pip install --upgrade pip")) + (run (network host) (shell "pip install 'numpy<2'")) + (run (network host) (shell "pip install gdal[numpy]==3.8.5")) + (run (shell "mkdir -p /root")) + (workdir "/root") + (copy (src "requirements.txt") (dst "./")) + (copy (src "aoh-calculator") (dst "./")) + (run (network host) (shell "pip install --no-cache-dir -r requirements.txt")) + (copy (src "deltap") (dst "./")) + (copy (src "utils") (dst "./")) +) +``` + +For each species we use the AoH data to calculate the likelihood of extinction under two scenarios: restoration and conseravation. To do that we work out the delta_p value per species, and then sum together all those results per species into a single layer. + + +```shark-run:deltap +python3 ./deltap/global_code_residents_pixel.py --speciesdata /data/species-info/%{TAXA}/current/* \ + --current_path /data/aohs/current/%{TAXA}/ \ + --scenario_path /data/aohs/restore/%{TAXA}/ \ + --historic_path /data/aohs/pnv/%{TAXA}/ \ + --z %{CURVE} \ + --output_path /data/deltap/restore/%{CURVE}/%{TAXA}/ + +python3 ./utils/raster_sum.py --rasters_directory /data/deltap/restore/%{CURVE}/%{TAXA}/ --output /data/deltap_sum/restore/%{CURVE}/%{TAXA}.tif + +python3 ./deltap/global_code_residents_pixel --speciesdata /data/species-info/%{TAXA}/current/* \ + --current_path /data/aohs/current/%{TAXA}/ \ + --scenario_path /data/aohs/arable/%{TAXA}/ \ + --historic_path /data/aohs/pnv/%{TAXA}/ \ + --z %{CURVE} \ + --output_path /data/deltap/arable/%{CURVE}/%{TAXA}/ + +python3 ./utils/raster_sum.py --rasters_directory /data/deltap/arable/%{CURVE}/%{TAXA}/ --output /data/deltap_sum/arable/%{CURVE}/%{TAXA}.tif +``` + +```shark-publish2 +/data/deltap/restore/ +/data/deltap/arable/ +``` + +Finally, we need to scale the results for publication: + +```shark-run:deltap +python3 ./deltap/delta_p_scaled_area.py --input /data/deltap_sum/restore/%{CURVE}/ \ + --diffmap /data/habitat/restore_diff_area.tif \ + --output /data/deltap_final/scaled_restore_%{CURVE}.tif + +python3 ./deltap/delta_p_scaled_area.py --input /data/deltap_sum/arable/%{CURVE}/ \ + --diffmap /data/habitat/arable_diff_area.tif \ + --output /data/deltap_final/scaled_arable_%{CURVE}.tif +``` + +```shark-publish +/data/deltap_final/scaled_restore_%{CURVE}.tif +/data/deltap_final/scaled_arable_%{CURVE}.tif +``` diff --git a/mollweide.py b/mollweide.py deleted file mode 100644 index c86a845..0000000 --- a/mollweide.py +++ /dev/null @@ -1,110 +0,0 @@ -import argparse -import os -import shutil -import tempfile - -import numpy as np -import pandas as pd -import h3 -from osgeo import gdal -from pyproj import CRS, Transformer - -TARGET_WIDTH=7267 -TARGET_HEIGHT=3385 - -EXTENT_MIN_X, EXTENT_MAX_X, EXTENT_MIN_Y, EXTENT_MAX_Y = -18027854.1353249, 18027101.8531421, -7965787.75894896, 8828766.53043604 # pylint: disable=C0301 - -PIXEL_SCALE_X = (EXTENT_MAX_X - EXTENT_MIN_X) / TARGET_WIDTH -PIXEL_SCALE_Y = (EXTENT_MAX_Y - EXTENT_MIN_Y) / TARGET_HEIGHT - -PROJ = """PROJCRS[\"Mollweide\",\n BASEGEOGCRS[\"WGS 84\",\n DATUM[\"D_unknown\",\n ELLIPSOID[\"WGS84\",6378137,298.257223563,\n LENGTHUNIT[\"metre\",1,\n ID[\"EPSG\",9001]]]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"Degree\",0.0174532925199433]]],\n CONVERSION[\"unnamed\",\n METHOD[\"Mollweide\"],\n PARAMETER[\"Longitude of natural origin\",0,\n ANGLEUNIT[\"Degree\",0.0174532925199433],\n ID[\"EPSG\",8802]],\n PARAMETER[\"False easting\",0,\n LENGTHUNIT[\"metre\",1],\n ID[\"EPSG\",8806]],\n PARAMETER[\"False northing\",0,\n LENGTHUNIT[\"metre\",1],\n ID[\"EPSG\",8807]]],\n CS[Cartesian,2],\n AXIS[\"(E)\",east,\n ORDER[1],\n LENGTHUNIT[\"metre\",1,\n ID[\"EPSG\",9001]]],\n AXIS[\"(N)\",north,\n ORDER[2],\n LENGTHUNIT[\"metre\",1,\n ID[\"EPSG\",9001]]]]""" # pylint: disable=C0301 - -def generate_mollweide( - tiles_csv_filename: str, - output_filename: str, -) -> None: - tiles_df = pd.read_csv(tiles_csv_filename) - - wgs85_crs = CRS.from_string("EPSG:4326") - mollweide_crs = CRS.from_string(PROJ) - transformer = Transformer.from_crs(wgs85_crs, mollweide_crs, always_xy=True) - - # work out the pixel scale - # x_scale = (transformer.transform(180, 0)[0] * 2.0) / TARGET_WIDTH - # y_scale = (transformer.transform(0, 90)[1] * 2.0) / TARGET_HEIGHT - x_scale = PIXEL_SCALE_X - y_scale = PIXEL_SCALE_Y - print(f"pixel scale: {x_scale}, {y_scale}") - - raw = np.zeros((TARGET_HEIGHT, TARGET_WIDTH)).tolist() - - with tempfile.TemporaryDirectory() as tempdir: - tempname = os.path.join(tempdir, "result.tif") - output_dataset = gdal.GetDriverByName("gtiff").Create( - tempname, - TARGET_WIDTH, - TARGET_HEIGHT, - 1, - gdal.GDT_Float64, - ['COMPRESS=LZW'], - ) - output_dataset.SetProjection(PROJ) - output_dataset.SetGeoTransform(( - EXTENT_MIN_X, x_scale, 0.0, - EXTENT_MIN_Y, 0.0, y_scale - )) - band = output_dataset.GetRasterBand(1) - - for _, row in tiles_df.iterrows(): - tileid, area = row - try: - lat, lng = h3.cell_to_latlng(tileid) - except ValueError: - print(f"Failed to process {tileid}") - continue - x_mollweide, y_mollweide = transformer.transform(lng, lat) # pylint: disable=E0633 - x_mollweide -= EXTENT_MAX_X - y_mollweide -= EXTENT_MIN_Y - - xpos = round((x_mollweide / x_scale)) - ypos = round((y_mollweide / y_scale)) - val = raw[ypos][xpos] - if val == 0: - val = [area] - else: - val.append(area) - raw[ypos][xpos] = val - - # Now we need to average all the cells - for yoffset in range(TARGET_HEIGHT): - for xoffset in range(TARGET_WIDTH): - val = raw[yoffset][xoffset] - raw[yoffset][xoffset] = np.mean(val) - - band.WriteArray(np.array(raw), 0, 0) - del output_dataset - - shutil.move(tempname, output_filename) - -def main() -> None: - parser = argparse.ArgumentParser() - parser.add_argument( - "--tiles", - type=str, - required=True, - dest="tiles_csv_filename", - help="CSV containing h3 tiles and values." - ) - parser.add_argument( - "--output", - type=str, - required=True, - dest="output_filename", - help="Filename for output GeoTIFF." - ) - args = parser.parse_args() - - generate_mollweide(args.tiles_csv_filename, args.output_filename) - -if __name__ == "__main__": - main() diff --git a/persistence/__init__.py b/persistence/__init__.py deleted file mode 100644 index 39fe890..0000000 --- a/persistence/__init__.py +++ /dev/null @@ -1,252 +0,0 @@ -# AoH calculator code for the 4C persistence calculator, a more specialised -# version of the logic from working on Daniele Baisero's AoH library. -# -# There's two seperate versions of the actual calculation - one for CPU use -# one for use with CUDA GPUs. Originally I wanted to have one function with -# conditional bits of code, but almost all the code ended up being conditional -# one way or the other, so the logic was hard to read. So instead we now have -# some duplication, but it is easier to see the logic in each one. - -import os -import shutil -import tempfile -from dataclasses import dataclass -from enum import Enum -from typing import List, Optional, Any, Tuple - -import numpy -from osgeo import gdal -try: - import cupy - import cupyx - USE_GPU = True -except ModuleNotFoundError: - USE_GPU = False - -from iucn_modlib.classes.Taxon import Taxon -from iucn_modlib.classes.HabitatFilters import HabitatFilters -import iucn_modlib.translator - -from yirgacheffe.layers import RasterLayer, VectorLayer, ConstantLayer, UniformAreaLayer, YirgacheffeLayer - -# When working with rasters we read larger chunks that just a single line, despite that usually -# being what GDAL recommends if you ask for the efficient block size for larger files. There's -# two reasons for this: -# 1: We use DynamicVectorRangeLayer to incrementally rasterize the vector habitat maps, so as to -# not need to hold the entire raster in memory at once. Doing that on a per line basis is -# somewhat slow. Thus the step is a tradeoff between memory allocation and CPU cost of -# processing the vectors. Moving from 1 line to 512 lines cut the runtime by close to half for -# the small sample I tested. -# 2: With the CUDA version of the calculator you have a cost of moving the data from main memory -# over to GPU memory and back. Again, doing so on a line by line basis is inefficient, and using -# a larger chunk size gives us better efficiency. -YSTEP = 512 - - -@dataclass -class LandModel: - habitat_map_filename: str - elevation_map_filename: str - area_map_filename: Optional[str] - translator: Any - - def new_habitat_layer(self) -> RasterLayer: - return RasterLayer.layer_from_file(self.habitat_map_filename) - - def new_elevation_layer(self) -> RasterLayer: - return RasterLayer.layer_from_file(self.elevation_map_filename) - - def new_area_layer(self) -> YirgacheffeLayer: - if self.area_map_filename is None: - return ConstantLayer(1.0) - try: - return UniformAreaLayer.layer_from_file(self.area_map_filename) - except ValueError: - return RasterLayer.layer_from_file(self.area_map_filename) - -class JungModel(LandModel): - def __init__(self, habitat_map_filename: str, elevation_map_filename: str, area_map_filename: Optional[str] = None): - super().__init__(habitat_map_filename, elevation_map_filename, area_map_filename, iucn_modlib.translator.toJung) - -class ESACCIModel(LandModel): - def __init__(self, habitat_map_filename: str, elevation_map_filename: str, area_map_filename: Optional[str] = None): - super().__init__(habitat_map_filename, elevation_map_filename, area_map_filename, - iucn_modlib.translator.toESACCI) - - -class Seasonality(Enum): - RESIDENT = "resident" - BREEDING = "breeding" - NONBREEDING = "nonbreeding" - - @property - def iucn_seasons(self) -> Tuple: - if self.value == 'resident': - return ('Resident', 'Seasonal Occurrence Unknown') - elif self.value == 'breeding': - return ('Resident', 'Breeding Season', 'Seasonal Occurrence Unknown') - elif self.value == 'nonbreeding': - return ('Resident', 'Non-Breeding Season', 'Seasonal Occurrence Unknown') - else: - raise NotImplementedError(f'Unhandled seasonlity value {self.value}') - - -def calculator( - species: Taxon, - range_path: str, - land_model: LandModel, - seasonality: Seasonality, - results_path: Optional[str] -) -> Tuple[float, Optional[str]]: - - # We do not re-use data in this, so set a small block cache size for GDAL, otherwise - # it pointlessly hogs memory, and then spends a long time tidying it up after. - gdal.SetCacheMax(1024 * 1024 * 16) - - habitat_params = iucn_modlib.HabitatFilters( - season = seasonality.iucn_seasons, - suitability = ('Suitable', 'Unknown'), - majorImportance = ('Yes', 'No'), - ) - habitat_list = land_model.translator(species.habitatCodes(habitat_params)) - - # These three map layers don't change across seasons - habitat_layer = land_model.new_habitat_layer() - elevation_layer = land_model.new_elevation_layer() - area_layer = land_model.new_area_layer() - - # range layer is only one that is seasonal, so recalculate - where_filter = f"id_no = {species.taxonid} and season in ('{seasonality.value}', 'resident')" - pixel_scale = habitat_layer.pixel_scale - assert pixel_scale - try: - range_layer = VectorLayer.layer_from_file(range_path, where_filter, pixel_scale, habitat_layer.projection) - except ValueError: - return 0.0, None - - # Work out the intersection of all the maps - layers = [habitat_layer, elevation_layer, area_layer, range_layer] - try: - intersection = YirgacheffeLayer.find_intersection(layers) - except ValueError: - for layer in layers: - print(f'Scale of {layer} is {layer.pixel_scale}') - raise - for layer in layers: - layer.set_window_for_intersection(intersection) - - with tempfile.TemporaryDirectory() as tempdir: - results_layer = None - results_dataset_filename = '' - if results_path: - results_dataset_filename = f'{seasonality}-{species.taxonid}.tif' - results_layer = RasterLayer.empty_raster_layer_like( - habitat_layer, - os.path.join(tempdir, results_dataset_filename), - datatype=gdal.GDT_Float32, - ) - - calculate_function = _calculate_cpu if not USE_GPU else _calculate_cuda - result = calculate_function( - range_layer, - habitat_layer, - habitat_list, - elevation_layer, - (species.elevation_lower, species.elevation_upper), - area_layer, - results_layer, - ) - # if we got here, then consider the experiment a success - if results_layer and results_path: - del results_layer # aka close for gdal - shutil.move(os.path.join(tempdir, results_dataset_filename), - os.path.join(results_path, results_dataset_filename)) - return result, results_dataset_filename - - -def _calculate_cpu( - range_layer: YirgacheffeLayer, - habitat_layer: YirgacheffeLayer, - habitat_list: List, - elevation_layer: YirgacheffeLayer, - elevation_range: Tuple[float, float], - area_layer: YirgacheffeLayer, - results_layer: Optional[YirgacheffeLayer] -) -> float: - filtered_habitat = habitat_layer.numpy_apply(lambda chunk: numpy.isin(chunk, habitat_list)) - filtered_elevation = elevation_layer.numpy_apply(lambda chunk: - numpy.logical_and(chunk >= min(elevation_range), chunk <= max(elevation_range))) - - # TODO: this isn't free - so if there's no nan's we'd like to avoid this stage - #cleaned_area = area_layer.numpy_apply(lambda chunk: numpy.nan_to_num(chunk, copy=False, nan=0.0)) - - data = filtered_habitat * filtered_elevation * area_layer * range_layer - if results_layer: - return data.save(results_layer, and_sum=True) - else: - return data.sum() - - -def _calculate_cuda( - range_layer: YirgacheffeLayer, - habitat_layer: YirgacheffeLayer, - habitat_list: List, - elevation_layer: YirgacheffeLayer, - elevation_range: Tuple[float, float], - area_layer: YirgacheffeLayer, - results_layer: Optional[YirgacheffeLayer] -) -> float: - - # all layers now have the same window width/height, so just take the habitat one - pixel_width = habitat_layer.window.xsize - pixel_height = habitat_layer.window.ysize - - aoh_shader = cupy.ElementwiseKernel( - 'bool habitat, int16 elevation, uint8 species_range, float64 pixel_area', - 'float64 result', - 'result = (species_range && habitat && ' \ - f'((elevation >= {min(elevation_range)}) && (elevation <= {max(elevation_range)})));' \ - 'result = result * pixel_area', - 'my_shader' - ) - aoh_reduction_shader = cupy.ReductionKernel( - 'bool habitat, int16 elevation, uint8 species_range, float64 pixel_area', - 'float64 result', - f'(species_range && habitat && ((elevation >= {min(elevation_range)}) && ' \ - f'(elevation <= {max(elevation_range)}))) * pixel_area', - 'a + b', - 'result = a', - '0.0', - 'my_reduction_shader' - ) - - habitat_list = cupy.array(habitat_list) - - area_total = 0.0 - data = None - for yoffset in range(0, pixel_height, YSTEP): - this_step = YSTEP - if yoffset + this_step > pixel_height: - this_step = pixel_height - yoffset - - habitat, elevation, species_range, pixel_areas = [ - cupy.array(x.read_array(0, yoffset, pixel_width, this_step)) - for x in [habitat_layer, elevation_layer, range_layer, area_layer] - ] - - filtered_habitat = cupy.isin(habitat, habitat_list) - - # if we don't need to store out the geotiff then we can do - # the summation and sum in a single reduction shader. Otherwise we need to - # calc to an area and then reduce, which is slower but is the price of - # getting the intermediary data - if not results_layer: - area_total += aoh_reduction_shader(filtered_habitat, elevation, species_range, pixel_areas) - else: - if data is None or data.shape != filtered_habitat.shape: - data = cupy.zeros(filtered_habitat.shape, cupy.float64) - aoh_shader(filtered_habitat, elevation, species_range, pixel_areas, data) - area_total += cupy.sum(data) - results_layer._dataset.GetRasterBand(1).WriteArray(data.get(), 0, yoffset) # pylint: disable=W0212 - - return area_total diff --git a/predictors/endemism.py b/predictors/endemism.py new file mode 100644 index 0000000..88a1ce1 --- /dev/null +++ b/predictors/endemism.py @@ -0,0 +1,272 @@ +# Endemism is the geometric mean of the proportion of how much each cell contributes to a species total AoH. +# Uses the trick from https://stackoverflow.com/questions/43099542/python-easy-way-to-do-geometric-mean-in-python +# for calculating the geometric mean with less risk of overflow + +import argparse +import os +import sys +import tempfile +import time +from glob import glob +from multiprocessing import Manager, Process, Queue, cpu_count + +import numpy as np +from osgeo import gdal +from yirgacheffe.layers import RasterLayer + +def stage_1_worker( + filename: str, + result_dir: str, + input_queue: Queue, +) -> None: + output_tif = os.path.join(result_dir, filename) + + merged_result = None + + while True: + raster_paths = input_queue.get() + if raster_paths is None: + break + + rasters = [RasterLayer.layer_from_file(x) for x in raster_paths] + + match len(rasters): + case 2: + union = RasterLayer.find_union(rasters) + for r in rasters: + r.set_window_for_union(union) + + aoh1 = rasters[0].sum() + if aoh1 > 0.0: + season1 = rasters[0].numpy_apply( + lambda a: np.nan_to_num(np.log(np.where(a == 0, np.nan, a) / aoh1)) + ) + else: + season1 = None + aoh2 = rasters[1].sum() + if aoh2 > 0.0: + season2 = rasters[1].numpy_apply( + lambda a: np.nan_to_num(np.log(np.where(a == 0, np.nan, a) / aoh2)) + ) + else: + season2 = None + + match season1, season2: + case None, None: + continue + case a, None: + combined = a + case None, b: + combined = b + case _, _: + combined = season1.numpy_apply(lambda a, b: np.where(a > b, a, b), season2) + + partial = RasterLayer.empty_raster_layer_like(rasters[0], datatype=gdal.GDT_Float64) + combined.save(partial) + case 1: + aoh = rasters[0].sum() + if aoh > 0.0: + partial = rasters[0].numpy_apply( + lambda a: np.nan_to_num(np.log(np.where(a == 0, np.nan, a) / aoh)) + ) + else: + continue + case _: + raise ValueError("too many seasons") + + if merged_result is None: + if len(rasters) > 1: + merged_result = partial + else: + merged_result = RasterLayer.empty_raster_layer_like(rasters[0], datatype=gdal.GDT_Float64) + partial.save(merged_result) + else: + merged_result.reset_window() + if len(rasters) > 1: + union = RasterLayer.find_union([merged_result, partial]) + partial.set_window_for_union(union) + else: + union = RasterLayer.find_union([merged_result, rasters[0]]) + rasters[0].set_window_for_union(union) + merged_result.set_window_for_union(union) + + merged = partial + merged_result + temp = RasterLayer.empty_raster_layer_like(merged_result) + merged.save(temp) + merged_result = temp + + if merged_result is not None: + final = RasterLayer.empty_raster_layer_like(merged_result, filename=output_tif) + merged_result.save(final) + +def stage_2_worker( + filename: str, + result_dir: str, + input_queue: Queue, +) -> None: + output_tif = os.path.join(result_dir, filename) + + merged_result = None + + while True: + path = input_queue.get() + if path is None: + break + + with RasterLayer.layer_from_file(path) as partial_raster: + if merged_result is None: + merged_result = RasterLayer.empty_raster_layer_like(partial_raster) + cleaned_raster = partial_raster.numpy_apply( + lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0) + ) + cleaned_raster.save(merged_result) + else: + merged_result.reset_window() + + union = RasterLayer.find_union([merged_result, partial_raster]) + merged_result.set_window_for_union(union) + partial_raster.set_window_for_union(union) + + calc = merged_result + partial_raster + temp = RasterLayer.empty_raster_layer_like(merged_result) + calc.save(temp) + merged_result = temp + + if merged_result: + final = RasterLayer.empty_raster_layer_like(merged_result, filename=output_tif) + merged_result.save(final) + +def endemism( + aohs_dir: str, + species_richness_path: str, + output_path: str, + processes_count: int +) -> None: + output_dir, _ = os.path.split(output_path) + os.makedirs(output_dir, exist_ok=True) + + aohs = glob("**/*.tif", root_dir=aohs_dir) + print(f"We fould {len(aohs)} AoH rasters") + + species_rasters = {} + for raster_path in aohs: + speciesid = os.path.splitext(os.path.basename(raster_path))[0] + full_path = os.path.join(aohs_dir, raster_path) + try: + species_rasters[speciesid].add(full_path) + except KeyError: + species_rasters[speciesid] = set([full_path]) + + with tempfile.TemporaryDirectory() as tempdir: + with Manager() as manager: + source_queue = manager.Queue() + + workers = [Process(target=stage_1_worker, args=( + f"{index}.tif", + tempdir, + source_queue + )) for index in range(processes_count)] + for worker_process in workers: + worker_process.start() + + for raster in species_rasters.items(): + source_queue.put(raster) + for _ in range(len(workers)): + source_queue.put(None) + + processes = workers + while processes: + candidates = [x for x in processes if not x.is_alive()] + for candidate in candidates: + candidate.join() + if candidate.exitcode: + for victim in processes: + victim.kill() + sys.exit(candidate.exitcode) + processes.remove(candidate) + time.sleep(1) + + # here we should have now a set of images in tempdir to merge + single_worker = Process(target=stage_2_worker, args=( + "summed_proportion.tif", + output_dir, + source_queue + )) + single_worker.start() + nextfiles = [os.path.join(tempdir, x) for x in glob("*.tif", root_dir=tempdir)] + for file in nextfiles: + source_queue.put(file) + source_queue.put(None) + + processes = [single_worker] + while processes: + candidates = [x for x in processes if not x.is_alive()] + for candidate in candidates: + candidate.join() + if candidate.exitcode: + for victim in processes: + victim.kill() + sys.exit(candidate.exitcode) + processes.remove(candidate) + time.sleep(1) + + with RasterLayer.layer_from_file(species_richness_path) as species_richness: + with RasterLayer.layer_from_file(os.path.join(output_dir, "summed_proportion.tif")) as summed_proportion: + + intersection = RasterLayer.find_intersection([summed_proportion, species_richness]) + summed_proportion.set_window_for_intersection(intersection) + species_richness.set_window_for_intersection(intersection) + + cleaned_species_richness = species_richness.numpy_apply(lambda a: np.where(a > 0, a, np.nan)) + + with RasterLayer.empty_raster_layer_like( + summed_proportion, + filename=output_path, + nodata=np.nan + ) as result: + calc = summed_proportion.numpy_apply(lambda a, b: np.exp(a / b), cleaned_species_richness) + calc.parallel_save(result) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Calculate species richness") + parser.add_argument( + "--aohs_folder", + type=str, + required=True, + dest="aohs", + help="Folder containing set of AoHs" + ) + parser.add_argument( + "--species_richness", + type=str, + required=True, + dest="species_richness", + help="GeoTIFF containing species richness" + ) + parser.add_argument( + "--output", + type=str, + required=True, + dest="output", + help="Destination GeoTIFF file for results." + ) + parser.add_argument( + "-j", + type=int, + required=False, + default=round(cpu_count() / 2), + dest="processes_count", + help="Number of concurrent threads to use." + ) + args = parser.parse_args() + + endemism( + args.aohs, + args.species_richness, + args.output, + args.processes_count + ) + +if __name__ == "__main__": + main() diff --git a/predictors/species_richness.py b/predictors/species_richness.py new file mode 100644 index 0000000..1c2e395 --- /dev/null +++ b/predictors/species_richness.py @@ -0,0 +1,211 @@ +import argparse +import os +import sys +import tempfile +import time +from glob import glob +from multiprocessing import Manager, Process, Queue, cpu_count + +import numpy as np +from osgeo import gdal +from yirgacheffe.layers import RasterLayer + +def stage_1_worker( + filename: str, + result_dir: str, + input_queue: Queue, +) -> None: + output_tif = os.path.join(result_dir, filename) + + merged_result = None + + while True: + raster_paths = input_queue.get() + if raster_paths is None: + break + + rasters = [RasterLayer.layer_from_file(x) for x in raster_paths] + + if len(rasters) > 1: + union = RasterLayer.find_union(rasters) + for r in rasters: + r.set_window_for_union(union) + calc = rasters[0].numpy_apply(lambda chunk: np.where(chunk == 0.0, 0, 1)) + for r in rasters[:1]: + calc = calc | r.numpy_apply(lambda chunk: np.where(chunk == 0.0, 0, 1)) + + partial = RasterLayer.empty_raster_layer_like(rasters[0], datatype=gdal.GDT_Int16) + calc.save(partial) + else: + partial = rasters[0].numpy_apply(lambda chunk: np.where(chunk == 0.0, 0, 1)) + + if merged_result is None: + if len(rasters) > 1: + merged_result = partial + else: + merged_result = RasterLayer.empty_raster_layer_like(rasters[0], datatype=gdal.GDT_Int16) + partial.save(merged_result) + else: + merged_result.reset_window() + if len(rasters) > 1: + union = RasterLayer.find_union([merged_result, partial]) + partial.set_window_for_union(union) + else: + union = RasterLayer.find_union([merged_result, rasters[0]]) + rasters[0].set_window_for_union(union) + merged_result.set_window_for_union(union) + + + merged = partial + merged_result + temp = RasterLayer.empty_raster_layer_like(merged_result) + merged.save(temp) + merged_result = temp + + if merged_result is not None: + final = RasterLayer.empty_raster_layer_like(merged_result, filename=output_tif) + merged_result.save(final) + +def stage_2_worker( + filename: str, + result_dir: str, + input_queue: Queue, +) -> None: + output_tif = os.path.join(result_dir, filename) + + merged_result = None + + while True: + path = input_queue.get() + if path is None: + break + + with RasterLayer.layer_from_file(path) as partial_raster: + if merged_result is None: + merged_result = RasterLayer.empty_raster_layer_like(partial_raster) + cleaned_raster = partial_raster.numpy_apply(lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0)) + cleaned_raster.save(merged_result) + else: + merged_result.reset_window() + + union = RasterLayer.find_union([merged_result, partial_raster]) + merged_result.set_window_for_union(union) + partial_raster.set_window_for_union(union) + + calc = merged_result + (partial_raster.numpy_apply( + lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0)) + ) + temp = RasterLayer.empty_raster_layer_like(merged_result) + calc.save(temp) + merged_result = temp + + if merged_result: + final = RasterLayer.empty_raster_layer_like(merged_result, filename=output_tif) + merged_result.save(final) + +def species_richness( + aohs_dir: str, + output_path: str, + processes_count: int +) -> None: + output_dir, filename = os.path.split(output_path) + os.makedirs(output_dir, exist_ok=True) + + aohs = glob("**/*.tif", root_dir=aohs_dir) + print(f"We fould {len(aohs)} AoH rasters") + + species_rasters = {} + for raster_path in aohs: + speciesid = os.path.splitext(os.path.basename(raster_path))[0] + full_path = os.path.join(aohs_dir, raster_path) + try: + species_rasters[speciesid].add(full_path) + except KeyError: + species_rasters[speciesid] = set([full_path]) + + with tempfile.TemporaryDirectory() as tempdir: + with Manager() as manager: + source_queue = manager.Queue() + + workers = [Process(target=stage_1_worker, args=( + f"{index}.tif", + tempdir, + source_queue + )) for index in range(processes_count)] + for worker_process in workers: + worker_process.start() + + for raster in species_rasters.items(): + source_queue.put(raster) + for _ in range(len(workers)): + source_queue.put(None) + + processes = workers + while processes: + candidates = [x for x in processes if not x.is_alive()] + for candidate in candidates: + candidate.join() + if candidate.exitcode: + for victim in processes: + victim.kill() + sys.exit(candidate.exitcode) + processes.remove(candidate) + time.sleep(1) + + # here we should have now a set of images in tempdir to merge + single_worker = Process(target=stage_2_worker, args=( + filename, + output_dir, + source_queue + )) + single_worker.start() + nextfiles = [os.path.join(tempdir, x) for x in glob("*.tif", root_dir=tempdir)] + for file in nextfiles: + source_queue.put(file) + source_queue.put(None) + + processes = [single_worker] + while processes: + candidates = [x for x in processes if not x.is_alive()] + for candidate in candidates: + candidate.join() + if candidate.exitcode: + for victim in processes: + victim.kill() + sys.exit(candidate.exitcode) + processes.remove(candidate) + time.sleep(1) + +def main() -> None: + parser = argparse.ArgumentParser(description="Calculate species richness") + parser.add_argument( + "--aohs_folder", + type=str, + required=True, + dest="aohs", + help="Folder containing set of AoHs" + ) + parser.add_argument( + "--output", + type=str, + required=True, + dest="output", + help="Destination GeoTIFF file for results." + ) + parser.add_argument( + "-j", + type=int, + required=False, + default=round(cpu_count() / 2), + dest="processes_count", + help="Number of concurrent threads to use." + ) + args = parser.parse_args() + + species_richness( + args.aohs, + args.output, + args.processes_count + ) + +if __name__ == "__main__": + main() diff --git a/prepare-layers/generate_crosswalk.py b/prepare-layers/generate_crosswalk.py new file mode 100644 index 0000000..e9fcfd9 --- /dev/null +++ b/prepare-layers/generate_crosswalk.py @@ -0,0 +1,64 @@ +import argparse +import os + +import pandas as pd +from iucn_modlib.translator import toJung + + +# Take from https://www.iucnredlist.org/resources/habitat-classification-scheme +IUCN_HABITAT_CODES = [ + "1", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7", "1.8", "1.9", + "2", "2.1", "2.2", + "3", "3.1", "3.2", "3.3", "3.4", "3.5", "3.6", "3.7", "3.8", + "4", "4.1", "4.2", "4.3", "4.4", "4.5", "4.6", "4.7", + "5", "5.1", "5.2", "5.3", "5.4", "5.5", "5.6", "5.7", "5.8", "5.9", + "5.10", "5.11", "5.12", "5.13", "5.14", "5.15", "5.16", "5.17", "5.18", + "6", + "7", "7.1", "7.2", + "8", "8.1", "8.2", "8.3", + "9", "9.1", "9.2", "9.3", "9.4", "9.5", "9.6", "9.7", "9.8", "9.9", "9.10", + "9.8.1", "9.8.2", "9.8.3", "9.8.4", "9.8.5", "9.8.6", + "10", "10.1", "10.2", "10.3", "10.4", + "11", "11.1", "11.1.1", "11.1.2", "11.2", "11.3", "11.4", "11.5", "11.6", + "12", "12.1", "12.2", "12.3", "12.4", "12.5", "12.6", "12.7", + "13", "13.1", "13.2", "13.3", "13.4", "13.5", + "14", "14.1", "14.2", "14.3", "14.4", "14.5", "14.6", + "15", "15.1", "15.2", "15.3", "15.4", "15.5", "15.6", "15.7", "15.8", + "15.9", "15.10", "15.11", "15.12", "15.13", + "16", + "17", + "18", +] + +def generate_crosswalk( + output_filename: str, +) -> None: + output_dir, _ = os.path.split(output_filename) + if output_dir: + os.makedirs(output_dir, exist_ok=True) + + res = [] + for iucn_code in IUCN_HABITAT_CODES: + try: + for jung_code in toJung([iucn_code]): + res.append([iucn_code, jung_code]) + except ValueError: + continue + + df = pd.DataFrame(res, columns=["code", "value"]) + df.to_csv(output_filename, index=False) + +def main() -> None: + parser = argparse.ArgumentParser(description="Generate a Jung crosswalk table as CSV.") + parser.add_argument( + '--output', + type=str, + help='Path where final crosswalk should be stored', + required=True, + dest='output_filename', + ) + args = parser.parse_args() + generate_crosswalk(args.output_filename) + +if __name__ == "__main__": + main() diff --git a/prepare-layers/make_arable_map.py b/prepare-layers/make_arable_map.py new file mode 100644 index 0000000..7236463 --- /dev/null +++ b/prepare-layers/make_arable_map.py @@ -0,0 +1,76 @@ +import argparse +from typing import Optional + +import numpy as np +from alive_progress import alive_bar +from yirgacheffe.layers import RasterLayer + +JUNG_ARABLE_CODE = 1401 +JUNG_URBAN_CODE = 1405 + +def make_arable_map( + current_path: str, + output_path: str, + concurrency: Optional[int], + show_progress: bool, +) -> None: + with RasterLayer.layer_from_file(current_path) as current: + + calc = current.numpy_apply( + lambda a: np.where(a != JUNG_URBAN_CODE, JUNG_ARABLE_CODE, a) + ) + + with RasterLayer.empty_raster_layer_like( + current, + filename=output_path, + threads=16 + ) as result: + if show_progress: + with alive_bar(manual=True) as bar: + calc.parallel_save(result, callback=bar, parallelism=concurrency) + else: + calc.parallel_save(result, parallelism=concurrency) + +def main() -> None: + parser = argparse.ArgumentParser(description="Generate the arable scenario map.") + parser.add_argument( + '--current', + type=str, + help='Path of Jung L2 map', + required=True, + dest='current_path', + ) + parser.add_argument( + '--output', + type=str, + help='Path where final map should be stored', + required=True, + dest='results_path', + ) + parser.add_argument( + '-j', + type=int, + help='Number of concurrent threads to use for calculation.', + required=False, + default=None, + dest='concurrency', + ) + parser.add_argument( + '-p', + help="Show progress indicator", + default=False, + required=False, + action='store_true', + dest='show_progress', + ) + args = parser.parse_args() + + make_arable_map( + args.current_path, + args.results_path, + args.concurrency, + args.show_progress, + ) + +if __name__ == "__main__": + main() diff --git a/prepare-layers/make_area_map.py b/prepare-layers/make_area_map.py new file mode 100644 index 0000000..e6dba2a --- /dev/null +++ b/prepare-layers/make_area_map.py @@ -0,0 +1,90 @@ +import argparse +import math + +import numpy as np +from osgeo import gdal +from yirgacheffe.window import Area, PixelScale +from yirgacheffe.layers import RasterLayer + +# Taken from https://gis.stackexchange.com/questions/127165/more-accurate-way-to-calculate-area-of-rasters +def area_of_pixel(pixel_size, center_lat): + """Calculate m^2 area of a wgs84 square pixel. + + Adapted from: https://gis.stackexchange.com/a/127327/2397 + + Parameters: + pixel_size (float): length of side of pixel in degrees. + center_lat (float): latitude of the center of the pixel. Note this + value +/- half the `pixel-size` must not exceed 90/-90 degrees + latitude or an invalid area will be calculated. + + Returns: + Area of square pixel of side length `pixel_size` centered at + `center_lat` in m^2. + + """ + a = 6378137 # meters + b = 6356752.3142 # meters + e = math.sqrt(1 - (b/a)**2) + area_list = [] + for f in [center_lat+pixel_size/2, center_lat-pixel_size/2]: + zm = 1 - e*math.sin(math.radians(f)) + zp = 1 + e*math.sin(math.radians(f)) + area_list.append( + math.pi * b**2 * ( + math.log(zp/zm) / (2*e) + + math.sin(math.radians(f)) / (zp*zm))) + return pixel_size / 360. * (area_list[0] - area_list[1]) + +def make_area_map( + pixel_scale: float, + output_path: str +) -> None: + pixels = [0,] * math.floor(90.0 / pixel_scale) + for i in range(len(pixels)): # pylint: disable=C0200 + y = (i + 0.5) * pixel_scale + area = area_of_pixel(pixel_scale, y) + pixels[i] = area + + allpixels = np.rot90(np.array([list(reversed(pixels)) + pixels])) + + area = Area( + left=math.floor(180 / pixel_scale) * pixel_scale * -1.0, + right=((math.floor(180 / pixel_scale) - 1) * pixel_scale * -1.0), + top=(math.floor(90 / pixel_scale) * pixel_scale), + bottom=(math.floor(90 / pixel_scale) * pixel_scale * -1.0) + ) + with RasterLayer.empty_raster_layer( + area, + PixelScale(pixel_scale, pixel_scale * -1.0), + gdal.GDT_Float32, + filename=output_path + ) as res: + res._dataset.WriteArray(allpixels, 0, 0) # pylint: disable=W0212 + + +def main() -> None: + parser = argparse.ArgumentParser(description="Downsample habitat map to raster per terrain type.") + parser.add_argument( + "--scale", + type=float, + required=True, + dest="pixel_scale", + help="Output pixel scale value." + ) + parser.add_argument( + "--output", + type=str, + required=True, + dest="output_path", + help="Destination file for area raster." + ) + args = parser.parse_args() + + make_area_map( + args.pixel_scale, + args.output_path + ) + +if __name__ == "__main__": + main() diff --git a/prepare-layers/make_current_map.py b/prepare-layers/make_current_map.py new file mode 100644 index 0000000..cf53f3a --- /dev/null +++ b/prepare-layers/make_current_map.py @@ -0,0 +1,110 @@ +import argparse +import itertools +from typing import Dict, Optional +from multiprocessing import set_start_method + +import pandas as pd +from alive_progress import alive_bar +from yirgacheffe.layers import RasterLayer + +# From Eyres et al: The current layer maps IUCN level 1 and 2 habitats, but habitats in the PNV layer are mapped +# only at IUCN level 1, so to estimate species’ proportion of original AOH now remaining we could only use natural +# habitats mapped at level 1 and artificial habitats at level 2. +IUCN_CODE_ARTIFICAL = [ + "14", "14.1", "14.2", "14.3", "14.4", "14.5", "14.6" +] + +def load_crosswalk_table(table_file_name: str) -> Dict[str,int]: + rawdata = pd.read_csv(table_file_name) + result = {} + for _, row in rawdata.iterrows(): + try: + result[row.code].append(int(row.value)) + except KeyError: + result[row.code] = [int(row.value)] + return result + + +def make_current_map( + current_path: str, + crosswalk_path: str, + output_path: str, + concurrency: Optional[int], + show_progress: bool, +) -> None: + with RasterLayer.layer_from_file(current_path) as current: + crosswalk = load_crosswalk_table(crosswalk_path) + + map_preserve_code = list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_ARTIFICAL])) + + def filter_data(a): + import numpy as np # pylint: disable=C0415 + return np.where(np.isin(a, map_preserve_code), a, (np.floor(a / 100) * 100).astype(int)) + + calc = current.numpy_apply(filter_data) + + with RasterLayer.empty_raster_layer_like( + current, + filename=output_path, + threads=16 + ) as result: + if show_progress: + with alive_bar(manual=True) as bar: + calc.parallel_save(result, callback=bar, parallelism=concurrency) + else: + calc.parallel_save(result, parallelism=concurrency) + + +def main() -> None: + set_start_method("spawn") + + parser = argparse.ArgumentParser(description="Zenodo resource downloader.") + parser.add_argument( + '--jung_l2', + type=str, + help='Path of Jung L2 map', + required=True, + dest='current_path', + ) + parser.add_argument( + '--crosswalk', + type=str, + help='Path of map to IUCN crosswalk table', + required=True, + dest='crosswalk_path', + ) + parser.add_argument( + '--output', + type=str, + help='Path where final map should be stored', + required=True, + dest='results_path', + ) + parser.add_argument( + '-j', + type=int, + help='Number of concurrent threads to use for calculation.', + required=False, + default=None, + dest='concurrency', + ) + parser.add_argument( + '-p', + help="Show progress indicator", + default=False, + required=False, + action='store_true', + dest='show_progress', + ) + args = parser.parse_args() + + make_current_map( + args.current_path, + args.crosswalk_path, + args.results_path, + args.concurrency, + args.show_progress, + ) + +if __name__ == "__main__": + main() diff --git a/prepare-layers/make_diff_map.py b/prepare-layers/make_diff_map.py new file mode 100644 index 0000000..26d8066 --- /dev/null +++ b/prepare-layers/make_diff_map.py @@ -0,0 +1,157 @@ +import argparse +import os +import shutil +import tempfile +from typing import Optional + +from osgeo import gdal +from alive_progress import alive_bar +from yirgacheffe.layers import RasterLayer, UniformAreaLayer + +def make_diff_map( + current_path: str, + scenario_path: str, + area_path: str, + pixel_scale: float, + target_projection: Optional[str], + output_path: str, + concurrency: Optional[int], + show_progress: bool, +) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + raw_map_filename = os.path.join(tmpdir, "raw.tif") + with RasterLayer.layer_from_file(current_path) as current: + with RasterLayer.layer_from_file(scenario_path) as scenario: + + layers = [current, scenario] + intersection = RasterLayer.find_intersection(layers) + for layer in layers: + layer.set_window_for_intersection(intersection) + + calc = current.numpy_apply(lambda a, b: a != b, scenario) + + with RasterLayer.empty_raster_layer_like( + current, + filename=raw_map_filename, + datatype=gdal.GDT_Float32, + threads=16 + ) as result: + if show_progress: + with alive_bar(manual=True) as bar: + calc.parallel_save(result, callback=bar, parallelism=concurrency) + else: + calc.parallel_save(result, parallelism=concurrency) + + rescaled_map_filename = os.path.join(tmpdir, "rescaled.tif") + gdal.Warp(rescaled_map_filename, raw_map_filename, options=gdal.WarpOptions( + creationOptions=['COMPRESS=LZW', 'NUM_THREADS=16'], + multithread=True, + dstSRS=target_projection, + outputType=gdal.GDT_Float32, + xRes=pixel_scale, + yRes=0.0 - pixel_scale, + resampleAlg="average", + workingType=gdal.GDT_Float32 + )) + + with UniformAreaLayer.layer_from_file(area_path) as area_map: + with RasterLayer.layer_from_file(rescaled_map_filename) as diff_map: + layers = [area_map, diff_map] + intersection = RasterLayer.find_intersection(layers) + for layer in layers: + layer.set_window_for_intersection(intersection) + + area_adjusted_map_filename = os.path.join(tmpdir, "final.tif") + calc = area_map * diff_map + + with RasterLayer.empty_raster_layer_like( + diff_map, + filename=area_adjusted_map_filename, + datatype=gdal.GDT_Float32, + threads=16 + ) as result: + if show_progress: + with alive_bar(manual=True) as bar: + calc.parallel_save(result, callback=bar, parallelism=concurrency) + else: + calc.parallel_save(result, parallelism=concurrency) + + shutil.move(area_adjusted_map_filename, output_path) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Generate an area difference map.") + parser.add_argument( + '--current', + type=str, + help='Path of Jung L2 map', + required=True, + dest='current_path', + ) + parser.add_argument( + '--scenario', + type=str, + help='Path of the scenario map', + required=True, + dest='scenario_path', + ) + parser.add_argument( + '--area', + type=str, + help='Path of the area per pixel map', + required=True, + dest='area_path', + ) + parser.add_argument( + "--scale", + type=float, + required=True, + dest="pixel_scale", + help="Output pixel scale value." + ) + parser.add_argument( + '--projection', + type=str, + help="Target projection", + required=False, + dest="target_projection", + default=None + ) + parser.add_argument( + '--output', + type=str, + help='Path where final map should be stored', + required=True, + dest='results_path', + ) + parser.add_argument( + '-j', + type=int, + help='Number of concurrent threads to use for calculation.', + required=False, + default=None, + dest='concurrency', + ) + parser.add_argument( + '-p', + help="Show progress indicator", + default=False, + required=False, + action='store_true', + dest='show_progress', + ) + args = parser.parse_args() + + make_diff_map( + args.current_path, + args.scenario_path, + args.area_path, + args.pixel_scale, + args.target_projection, + args.results_path, + args.concurrency, + args.show_progress, + ) + +if __name__ == "__main__": + main() diff --git a/prepare-layers/make_restore_map.py b/prepare-layers/make_restore_map.py new file mode 100644 index 0000000..0fadf75 --- /dev/null +++ b/prepare-layers/make_restore_map.py @@ -0,0 +1,128 @@ +import argparse +import itertools +import sys +from typing import Dict, Optional + +import numpy as np +import pandas as pd +from alive_progress import alive_bar +from yirgacheffe.layers import RasterLayer, RescaledRasterLayer + +# From Eyres et al: In the restoration scenario all areas classified as arable or pasture were restored to their PNV +IUCN_CODE_REPLACEMENTS = [ + "14.1", + "14.2" +] + +def load_crosswalk_table(table_file_name: str) -> Dict[str,int]: + rawdata = pd.read_csv(table_file_name) + result = {} + for _, row in rawdata.iterrows(): + try: + result[row.code].append(int(row.value)) + except KeyError: + result[row.code] = [int(row.value)] + return result + + +def make_restore_map( + pnv_path: str, + current_path: str, + crosswalk_path: str, + output_path: str, + concurrency: Optional[int], + show_progress: bool, +) -> None: + with RasterLayer.layer_from_file(current_path) as current: + with RescaledRasterLayer.layer_from_file(pnv_path, current.pixel_scale) as pnv: + crosswalk = load_crosswalk_table(crosswalk_path) + + map_replacement_codes = list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_REPLACEMENTS])) + + try: + intersection = RasterLayer.find_intersection([pnv, current]) + except ValueError: + print("Layers do not match in pixel scale or projection:\n", file=sys.stderr) + print(f"\t{pnv_path}: {pnv.pixel_scale}, {pnv.projection}") + print(f"\t{current_path}: {current.pixel_scale}, {current.projection}") + sys.exit(-1) + + for layer in [pnv, current]: + layer.set_window_for_intersection(intersection) + + calc = current.numpy_apply( + lambda a, b: np.where(np.isin(a, map_replacement_codes), b, a), + pnv + ) + + with RasterLayer.empty_raster_layer_like( + current, + filename=output_path, + threads=16 + ) as result: + if show_progress: + with alive_bar(manual=True) as bar: + calc.parallel_save(result, callback=bar, parallelism=concurrency) + else: + calc.parallel_save(result, parallelism=concurrency) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Zenodo resource downloader.") + parser.add_argument( + '--pnv', + type=str, + help='Path of PNV map', + required=True, + dest='pnv_path', + ) + parser.add_argument( + '--currentl2', + type=str, + help='Path of current L2 map', + required=True, + dest='current_path', + ) + parser.add_argument( + '--crosswalk', + type=str, + help='Path of map to IUCN crosswalk table', + required=True, + dest='crosswalk_path', + ) + parser.add_argument( + '--output', + type=str, + help='Path where final map should be stored', + required=True, + dest='results_path', + ) + parser.add_argument( + '-j', + type=int, + help='Number of concurrent threads to use for calculation.', + required=False, + default=None, + dest='concurrency', + ) + parser.add_argument( + '-p', + help="Show progress indicator", + default=False, + required=False, + action='store_true', + dest='show_progress', + ) + args = parser.parse_args() + + make_restore_map( + args.pnv_path, + args.current_path, + args.crosswalk_path, + args.results_path, + args.concurrency, + args.show_progress, + ) + +if __name__ == "__main__": + main() diff --git a/prepare-species/extract_species_psql.py b/prepare-species/extract_species_psql.py new file mode 100644 index 0000000..a84eddc --- /dev/null +++ b/prepare-species/extract_species_psql.py @@ -0,0 +1,351 @@ +import argparse +import importlib +import logging +import os +from functools import partial +from multiprocessing import Pool +from typing import Optional, Tuple + +# import pyshark # pylint: disable=W0611 +import geopandas as gpd +import pyproj +import psycopg2 +import shapely +from postgis.psycopg import register + +aoh_cleaning = importlib.import_module("aoh-calculator.cleaning") + +logger = logging.getLogger(__name__) +logging.basicConfig() +logger.setLevel(logging.DEBUG) + +SEASON_NAME = { + 1: "RESIDENT", + 2: "BREEDING", + 3: "NONBREEDING", +} + +COLUMNS = [ + "id_no", + "season", + "elevation_lower", + "elevation_upper", + "full_habitat_code", + "geometry" +] + +MAIN_STATEMENT = """ +SELECT + assessments.sis_taxon_id as id_no, + assessments.id as assessment_id, + (assessment_supplementary_infos.supplementary_fields->>'ElevationLower.limit')::numeric AS elevation_lower, + (assessment_supplementary_infos.supplementary_fields->>'ElevationUpper.limit')::numeric AS elevation_upper +FROM + assessments + LEFT JOIN taxons ON taxons.id = assessments.taxon_id + LEFT JOIN assessment_supplementary_infos ON assessment_supplementary_infos.assessment_id = assessments.id + LEFT JOIN red_list_category_lookup ON red_list_category_lookup.id = assessments.red_list_category_id +WHERE + assessments.latest = true + AND taxons.class_name = %s + AND red_list_category_lookup.code NOT IN ('EX') +""" + +HABITATS_STATEMENT = """ +SELECT + assessment_habitats.supplementary_fields->>'season', + assessment_habitats.supplementary_fields->>'majorImportance', + STRING_AGG(habitat_lookup.code, '|') AS full_habitat_code, + STRING_AGG(system_lookup.description->>'en', '|') AS systems +FROM + assessments + LEFT JOIN assessment_habitats ON assessment_habitats.assessment_id = assessments.id + LEFT JOIN habitat_lookup on habitat_lookup.id = assessment_habitats.habitat_id + LEFT JOIN assessment_systems ON assessment_systems.assessment_id = assessments.id + LEFT JOIN system_lookup ON assessment_systems.system_lookup_id = system_lookup.id +WHERE + assessments.id = %s + AND ( + -- LIFE ignores marginal suitability, and ignores majorImportance + assessment_habitats.supplementary_fields->>'suitability' IS NULL + OR assessment_habitats.supplementary_fields->>'suitability' IN ('Suitable', 'Unknown') + ) +GROUP BY (assessment_habitats.supplementary_fields->>'season', assessment_habitats.supplementary_fields->>'majorImportance') +""" + +GEOMETRY_STATEMENT = """ +SELECT + assessment_ranges.seasonal, + ST_UNION(assessment_ranges.geom::geometry) OVER (PARTITION BY assessment_ranges.seasonal) AS geometry +FROM + assessments + LEFT JOIN assessment_ranges On assessment_ranges.assessment_id = assessments.id +WHERE + -- LIFE doesn't use passage (season 4), and treats unknown (season 5) as resident. + assessments.id = %s + AND assessment_ranges.presence IN %s + AND assessment_ranges.origin IN (1, 2, 6) + AND assessment_ranges.seasonal IN (1, 2, 3, 5) +""" + +DB_HOST = os.getenv("DB_HOST") +DB_PORT = os.getenv("DB_PORT", "5432") +DB_NAME = os.getenv("DB_NAME") +DB_USER = os.getenv("DB_USER") +DB_PASSWORD = os.getenv("DB_PASSWORD") +DB_CONFIG = ( + f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}" +) + +def tidy_reproject_save( + gdf: gpd.GeoDataFrame, + output_directory_path: str +) -> None: + # The geometry is in CRS 4326, but the AoH work is done in World_Behrmann, aka Projected CRS: ESRI:54017 + src_crs = pyproj.CRS.from_epsg(4326) + target_crs = src_crs #pyproj.CRS.from_string(target_projection) + + graw = gdf.loc[0].copy() + grow = aoh_cleaning.tidy_data(graw) + os.makedirs(output_directory_path, exist_ok=True) + output_path = os.path.join(output_directory_path, f"{grow.id_no}_{grow.season}.geojson") + res = gpd.GeoDataFrame(grow.to_frame().transpose(), crs=src_crs, geometry="geometry") + res_projected = res.to_crs(target_crs) + res_projected.to_file(output_path, driver="GeoJSON") + + +def process_row( + output_directory_path: str, + presence: Tuple[int], + row: Tuple, +) -> None: + + connection = psycopg2.connect(DB_CONFIG) + register(connection) + cursor = connection.cursor() + + + id_no, assessment_id, elevation_lower, elevation_upper = row + + cursor.execute(HABITATS_STATEMENT, (assessment_id,)) + raw_habitats = cursor.fetchall() + + if len(raw_habitats) == 0: + logger.debug("Dropping %s as no habitats found", id_no) + return + + # Clean up habitats to ensure they're unique (the system agg in the SQL statement might duplicate them) + # In the database there are the following seasons: + # breeding + # Breeding Season + # non-breeding + # Non-Breeding Season + # passage + # Passage + # resident + # Resident + # Seasonal Occurrence Unknown + # unknown + # null + + habitats = {} + for season, major_importance, habitat_values, systems in raw_habitats: + + match season: + case 'passage', 'Passage': + continue + case 'resident', 'Resident', 'Seasonal Occurrence Unknown', 'unknown', None: + season_code = 1 + case 'breeding', 'Breeding Season': + season_code = 2 + case 'non-breeding', 'Non-Breeding Season': + season_code = 3 + case _: + raise ValueError(f"Unexpected season {season} for {id_no}") + + if systems is None: + logger.debug("Dropping %s: no systems in DB", id_no) + continue + if "Marine" in systems: + logger.debug("Dropping %s: marine in systems", id_no) + return + + if habitat_values is None: + logger.debug("Dropping %s: no habitats in DB", id_no) + continue + habitat_set = set(habitat_values.split('|')) + if len(habitat_set) == 0: + continue + if any(x.startswith('7') for x in habitat_set) and major_importance == 'Yes': + logger.debug("Dropping %s: Habitat 7 in habitat list", id_no) + return + + try: + habitats[season_code] |= habitat_set + except KeyError: + habitats[season_code] = habitat_set + + if len(habitats) == 0: + logger.debug("Dropping %s: No habitats", id_no) + return + + cursor.execute(GEOMETRY_STATEMENT, (assessment_id, presence)) + geometries_data = cursor.fetchall() + if len(geometries_data) == 0: + logger.info("Dropping %s: no geometries", id_no) + return + geometries = {} + for season, geometry in geometries_data: + grange = shapely.normalize(shapely.from_wkb(geometry.to_ewkb())) + + match season: + case 1 | 5: + season_code = 1 + case 2 | 3: + season_code = season + case _: + raise ValueError(f"Unexpected season: {season}") + + try: + geometries[season_code] = shapely.union(geometries[season_code], grange) + except KeyError: + geometries[season_code] = grange + + seasons = set(geometries.keys()) | set(habitats.keys()) + + if seasons == {1}: + # Resident only + gdf = gpd.GeoDataFrame( + [[ + id_no, + SEASON_NAME[1], + int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, + '|'.join(list(habitats[1])), + geometries[1] + ]], + columns=COLUMNS, + crs='epsg:4326' + ) + tidy_reproject_save(gdf, output_directory_path) + else: + # Breeding and non-breeding + # Sometimes in the IUCN database there's only data on one season (e.g., AVES 103838515), and so + # we need to do another sanity check to make sure both have useful data before we write out + + geometries_seasons_breeding = set(geometries.keys()) + geometries_seasons_breeding.discard(3) + geometries_breeding = [geometries[x] for x in geometries_seasons_breeding] + if len(geometries_breeding) == 0: + logger.debug("Dropping %s as no breeding geometries", id_no) + return + geometry_breeding = shapely.union_all(geometries_breeding) + + geometries_seasons_non_breeding = set(geometries.keys()) + geometries_seasons_non_breeding.discard(2) + geometries_non_breeding = [geometries[x] for x in geometries_seasons_non_breeding] + if len(geometries_non_breeding) == 0: + logger.debug("Dropping %s as no non-breeding geometries", id_no) + return + geometry_non_breeding = shapely.union_all(geometries_non_breeding) + + habitats_seasons_breeding = set(habitats.keys()) + habitats_seasons_breeding.discard(3) + habitats_breeding = set() + for season in habitats_seasons_breeding: + habitats_breeding |= habitats[season] + if len(habitats_breeding) == 0: + logger.debug("Dropping %s as no breeding habitats", id_no) + return + + habitats_seasons_non_breeding = set(habitats.keys()) + habitats_seasons_non_breeding.discard(2) + habitats_non_breeding = set() + for season in habitats_seasons_non_breeding: + habitats_non_breeding |= habitats[season] + if len(habitats_non_breeding) == 0: + logger.debug("Dropping %s as no non-breeding habitats", id_no) + return + + gdf = gpd.GeoDataFrame( + [[ + id_no, + SEASON_NAME[2], + int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, + '|'.join(list(habitats_breeding)), + geometry_breeding + ]], + columns=COLUMNS, + crs='epsg:4326' + ) + tidy_reproject_save(gdf, output_directory_path) + + gdf = gpd.GeoDataFrame( + [[ + id_no, SEASON_NAME[3], + int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, + '|'.join(list(habitats_non_breeding)), + geometry_non_breeding + ]], + columns=COLUMNS, + crs='epsg:4326', + ) + tidy_reproject_save(gdf, output_directory_path) + + +def extract_data_per_species( + classname: str, + output_directory_path: str, + _target_projection: Optional[str], +) -> None: + + connection = psycopg2.connect(DB_CONFIG) + cursor = connection.cursor() + + for era, presence in [("current", (1, 2)), ("historic", (1, 2, 4, 5))]: + era_output_directory_path = os.path.join(output_directory_path, era) + + cursor.execute(MAIN_STATEMENT, (classname,)) + # This can be quite big (tens of thousands), but in modern computer term is quite small + # and I need to make a follow on DB query per result. + results = cursor.fetchall() + + logger.info("Found %d species in class %s in scenarion %s", len(results), classname, era) + + # The limiting amount here is how many concurrent connections the database can take + with Pool(processes=20) as pool: + pool.map(partial(process_row, era_output_directory_path, presence), results) + +def main() -> None: + parser = argparse.ArgumentParser(description="Process agregate species data to per-species-file.") + parser.add_argument( + '--class', + type=str, + help="Species class name", + required=True, + dest="classname", + ) + parser.add_argument( + '--output', + type=str, + help='Directory where per species Geojson is stored', + required=True, + dest='output_directory_path', + ) + parser.add_argument( + '--projection', + type=str, + help="Target projection", + required=False, + dest="target_projection", + default="ESRI:54017" + ) + args = parser.parse_args() + + extract_data_per_species( + args.classname, + args.output_directory_path, + args.target_projection + ) + +if __name__ == "__main__": + main() diff --git a/prepare-species/readme.md b/prepare-species/readme.md new file mode 100644 index 0000000..4fb3560 --- /dev/null +++ b/prepare-species/readme.md @@ -0,0 +1,25 @@ +# Species selection for LIFE + +LIFE is currently based on the following species selection criteria from the IUCN Redlist list of endangered species. In general we align with the guidelines set out in [Recent developments in the production of Area of Habitat (AOH) maps for terrestrial vertebrates.]() by Busana et al. + +* We select from the classes AMPHIBIA, AVES, MAMMALIA, and REPTILIA. +* We exclude species that are categorised as: + * Extinct + * Not endangered + * Data deficient +* Select the most recent asessment for each species. +* When selecting habitats we ignore those marked with their suitability as "marginal" +* For ranges + * We select for origin as codes 1, 2 and 6 (Native, reintroduced, and assisted colonization) + * LIFE generates data under both current and historic scenarios, and so the selection process for ranges is different for each scenario: + * For current, we select under 1 and 2 (Extant and Probably Extant) + * For historic, we select under 1, 2, 4, and 5 (Extant, Probably Extant, Possibly Extinct, and Extinct) + * Seasonality is selecter from the categories Resident, Breeding, and non-Breeding. These are then combined in the following way: + * For species with only a resident range, we treat them as resident only. + * For species that are migratory (having either a breeding or non-breeding), we generate both a breeding and non-breeding range, where each is the union of the respective migratory range (if present) and the resident range if present. +* For metadata, we do the following hygine steps: + * If elevation lower is missing, or less than the expected minimum we set it to that minimum: -500m + * If elevation upper is missing, or over the expected maximum we set it to that maximum: 9000m + * If the elevation lower is greater than the upper, we invert the two values + * If the difference between the two values is less than 50m then each value is equally adjusted out from centre to ensure that they are 50m apart. + diff --git a/readmeta.py b/readmeta.py deleted file mode 100644 index 6fb2c10..0000000 --- a/readmeta.py +++ /dev/null @@ -1,43 +0,0 @@ -import json -import sys -import time - -import pyarrow.parquet as pq - -def main() -> None: - if len(sys.argv) != 2: - print(f"Usage: {sys.argv[0]}", file=sys.stderr) - sys.exit(1) - - file = pq.read_table(sys.argv[1]) - metadata = file.schema.metadata - - try: - arkmetadata = metadata[b"experiment"] - except KeyError: - print("No ARK metadata on this file", file=sys.stderr) - sys.exit(1) - - try: - info = json.loads(arkmetadata) - except ValueError as exc: - print("Unable to decode ARK metadata: %e", exc, file=sys.stderr) - sys.exit(1) - - keys = list(info.keys()) - keys.sort() - maxlen = 0 - for k in keys: - if len(k) > maxlen: - maxlen = len(k) - - for k in keys: - if k == 'timestamp': - val = time.ctime(info[k]) - else: - val = info[k] - - print(f'{k}{" " * (maxlen - len(k))}\t{val}') - -if __name__ == "__main__": - main() diff --git a/requirements.txt b/requirements.txt index 9e8f6a8..caf5fc9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,18 @@ -requests -numpy -gdal[numpy]<3.7 +geopandas +numpy<2 pyarrow pandas -geopandas -h3==4.0.0b1 +psutil scipy +pyproj +psycopg2 +postgis +scikit-image rasterio +requests +alive-progress + +gdal[numpy]==3.9.3 git+https://github.com/carboncredits/iucn_modlib.git git+https://github.com/carboncredits/yirgacheffe diff --git a/scripts/run.sh b/scripts/run.sh new file mode 100755 index 0000000..5bd3b17 --- /dev/null +++ b/scripts/run.sh @@ -0,0 +1,136 @@ +#!/bin/bash +# +# Assumes you've set up a python virtual environement in the current directory. +# +# In addition to the Python environemnt, you will need the following extra command line tools: +# +# https://github.com/quantifyearth/reclaimer - used to download inputs from Zenodo directly +# https://github.com/quantifyearth/littlejohn - used to run batch jobs in parallel + +set -e + +if [ -z "${DATADIR}" ]; then + echo "Please specify $DATADIR" + exit 1 +fi + + +if [ -z "${VIRTUAL_ENV}" ]; then + echo "Please specify run in a virtualenv" + exit 1 +fi + +declare -a CURVES=("0.1" "0.25" "0.5" "1.0" "gompertz") + +# Get habitat layer and prepare for use +reclaimer zenodo --zenodo_id 4058819 \ + --filename iucn_habitatclassification_composite_lvl2_ver004.zip \ + --extract \ + --output ${DATADIR}/habitat/jung_l2_raw.tif + +python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/current_raw.tif \ + --scale 0.016666666666667 \ + --output ${DATADIR}/habitat_maps/current/ + +# Get PNV layer and prepare for use +reclaimer zenodo --zenodo_id 4038749 \ + --filename pnv_lvl1_004.zip \ + --extract \ + --output ${DATADIR}/habitat/pnv_raw.tif + +python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/pnv_raw.tif \ + --scale 0.016666666666667 \ + --output ${DATADIR}/habitat_maps/pnv/ + +# Generate an area scaling map +python3 ./prepare-layers/make_area_map.py --scale 0.016666666666667 --output ${DATADIR}/habitat/area-per-pixel.tif + +# Generate the arable scenario map +python3 ./prepare-layers/make_arable_map.py --current ${DATADIR}/habitat/current_raw.tif \ + --output ${DATADIR}/habitat/arable.tif + +python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/arable.tif \ + --scale 0.016666666666667 \ + --output ${DATADIR}/habitat_maps/arable/ + +python3 ./prepare-layers/make_diff_map.py --current ${DATADIR}/habitat/current_raw.tif \ + --scenario ${DATADIR}/habitat/arable.tif \ + --area ${DATADIR}/area-per-pixel.tif \ + --scale 0.016666666666667 \ + --output ${DATADIR}/habitat/arable_diff_area.tif + +# Generate the restore map +python3 ./prepare-layers/make_restore_map.py --pnv ${DATADIR}/habitat/pnv_raw.tif \ + --current ${DATADIR}/habitat/current_raw.tif \ + --crosswalk ${DATADIR}/crosswalk.csv \ + --output ${DATADIR}/habitat/restore.tif + +python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/restore.tif \ + --scale 0.016666666666667 \ + --output ${DATADIR}/habitat_maps/restore/ + +python3 ./prepare-layers/make_diff_map.py --current ${DATADIR}/habitat/current_raw.tif \ + --scenario ${DATADIR}/habitat/restore.tif \ + --area ${DATADIR}/area-per-pixel.tif \ + --scale 0.016666666666667 \ + --output ${DATADIR}/habitat/restore_diff_area.tif + +# Fetch and prepare the elevation layers +reclaimer zenodo --zenodo_id 5719984 --filename dem-100m-esri54017.tif --output ${DATADIR}/elevation.tif +gdalwarp -t_srs EPSG:4326 -tr 0.016666666666667 -0.016666666666667 -r max -co COMPRESS=LZW -wo NUM_THREADS=40 ${DATADIR}/elevation.tif ${DATADIR}/elevation-max-1k.tif +gdalwarp -t_srs EPSG:4326 -tr 0.016666666666667 -0.016666666666667 -r min -co COMPRESS=LZW -wo NUM_THREADS=40 ${DATADIR}/elevation.tif ${DATADIR}/elevation-min-1k.tif + +# Get species data per taxa from IUCN data +python3 ./prepare-species/extract_species_psql.py --class AVES --output ${DATADIR}/species-info/AVES/ --projection "EPSG:4326" +python3 ./prepare-species/extract_species_psql.py --class AMPHIBIA --output ${DATADIR}/species-info/AMPHIBIA/ --projection "EPSG:4326" +python3 ./prepare-species/extract_species_psql.py --class MAMMALIA --output ${DATADIR}/species-info/MAMMALIA/ --projection "EPSG:4326" +python3 ./prepare-species/extract_species_psql.py --class REPTILIA --output ${DATADIR}/species-info/REPTILIA/ --projection "EPSG:4326" + +# Generate the batch job input CSVs +python3 ./utils/speciesgenerator.py --input ${DATADIR}/species-info --datadir ${DATADIR} --output ${DATADIR}/aohbatch.csv +python3 ./utils/persistencegenerator.py --input ${DATADIR}/species-info --datadir ${DATADIR} --output ${DATADIR}/persistencebatch.csv + +# Calculate all the AoHs +littlejohn -j 200 -o ${DATADIR}/aohbatch.log -c ${DATADIR}/aohbatch.csv ${VIRTUAL_ENV}/bin/python3 -- ./aoh-calculator/aohcalc.py --force-habitat + +# Calculate the per species Delta P values +littlejohn -j 200 -o ${DATADIR}/persistencebatch.log -c ${DATADIR}/persistencebatch.csv ${VIRTUAL_ENV}/bin/python3 -- ./deltap/global_code_residents_pixel.py + +for CURVE in "${CURVES[@]}" +do + # Per scenario per taxa sum the delta Ps + python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/arable/${CURVE}/REPTILIA/ --output ${DATADIR}/deltap_sum/arable/${CURVE}/REPTILIA.tif + python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/arable/${CURVE}/AVES/ --output ${DATADIR}/deltap_sum/arable/${CURVE}/AVES.tif + python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/arable/${CURVE}/MAMMALIA/ --output ${DATADIR}/deltap_sum/arable/${CURVE}/MAMMALIA.tif + python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/arable/${CURVE}/AMPHIBIA/ --output ${DATADIR}/deltap_sum/arable/${CURVE}/AMPHIBIA.tif + + python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/restore/${CURVE}/MAMMALIA/ --output ${DATADIR}/deltap_sum/restore/${CURVE}/MAMMALIA.tif + python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/restore/${CURVE}/AMPHIBIA/ --output ${DATADIR}/deltap_sum/restore/${CURVE}/AMPHIBIA.tif + python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/restore/${CURVE}/REPTILIA/ --output ${DATADIR}/deltap_sum/restore/${CURVE}/REPTILIA.tif + python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/restore/${CURVE}/AVES/ --output ${DATADIR}/deltap_sum/restore/${CURVE}/AVES.tif + + # Generate final map + python3 ./deltap/delta_p_scaled_area.py --input ${DATADIR}/deltap_sum/restore/${CURVE}/ \ + --diffmap ${DATADIR}/habitat/restore_diff_area.tif \ + --output ${DATADIR}/deltap_final/scaled_restore_${CURVE}.tif + + python3 ./deltap/delta_p_scaled_area.py --input ${DATADIR}/deltap_sum/arable/${CURVE}/ \ + --diffmap ${DATADIR}/habitat/arable_diff_area.tif \ + --output ${DATADIR}/deltap_final/scaled_arable_${CURVE}.tif +done + +for CURVE in "${CURVES[@]}" +do + if [ "${CURVE}" == "0.25" ]; then + continue + fi + python3 ./utils/regression_plot.py --a ${DATADIR}/deltap_final/summed_scaled_arable_${CURVE}.tif \ + --b ${DATADIR}/deltap_final/summed_scaled_arable_0.25.tif \ + --output {$DATADIR}/analysis/arable_0.25_vs_${CURVE}.png + + python3 ./utils/regression_plot.py --a ${DATADIR}/deltap_final/summed_scaled_restore_${CURVE}.tif \ + --b ${DATADIR}/deltap_final/summed_scaled_restore_0.25.tif \ + --output {$DATADIR}/analysis/restore_0.25_vs_${CURVE}.png +done + +python3 ./predictors/species_richness.py --aohs_folder ${DATADIR}/aohs/current/ --output ${DATADIR}/predictors/species_richness.tif diff --git a/speciesgenerator.py b/speciesgenerator.py deleted file mode 100644 index e153507..0000000 --- a/speciesgenerator.py +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import contextlib -import json -import sys -from typing import List, Set, Tuple - -import geopandas as gpd -import pandas as pd - -#from aoh.apps.lib import seasonality -import seasonality -from iucn_modlib.classes.Taxon import Taxon -from iucn_modlib.factories import TaxonFactories - -@contextlib.contextmanager -def file_writer(file_name = None): - writer = open(file_name, "w", encoding="utf-8") if file_name is not None else sys.stdout - yield writer - if file_name: - writer.close() - -def project_species_list(project: str, ranges: str) -> List[Tuple[int, str]]: - ''' Returns a list of species that have ranges that intersect with project polygon - To Check: does it give the correct answers? , What do we want the output to be like? - (id_nos are float but should maybe be int) - - Parameters: - Project: the file address of a project polygon - Ranges: the file address of the species' range polygons - - Output: dictionary of id_no and binomial for species that are present - ''' - # IMPORT PROJECT POLYGON - project_polygon = gpd.read_file(project) - # IMPORT SPECIES RANGES FILTERED BY WHETHER THEY INTERSECT WITH THE PROJECT POLYGON - ranges_gdf = gpd.read_file(ranges, mask=project_polygon) - # CONVERT TO DATAFRAME - # Note: Not sure if all of these steps are necessary - ranges_df = pd.DataFrame(ranges_gdf) # I think stops it being a spatial database? - # EXTRACT A LIST OF UNIQUE ID_NO and UNIQUE BIOMIALS - id_list = [int(x) for x in ranges_df['id_no'].unique().tolist()] - binomial_list = ranges_df['binomial'].unique().tolist() - return zip(id_list, binomial_list) - -def seasonality_for_species(species: Taxon, range_file: str) -> Set[str]: - og_seasons = set( - seasonality.habitatSeasonality(species) + - seasonality.rangeSeasonality(range_file, species.taxonid) - ) - if len(og_seasons) == 0: - return {} - seasons = {'resident'} - if len(og_seasons.difference({'resident'})) > 0: - seasons = {'breeding', 'nonbreeding'} - return seasons - -def main() -> None: - parser = argparse.ArgumentParser(description="Species and seasonality generator.") - parser.add_argument( - '--experiment', - type=str, - help="name of experiment group from configuration json", - required=True, - dest="experiment" - ) - parser.add_argument( - '--config', - type=str, - help="path of configuration json", - required=False, - dest="config_path", - default="config.json" - ) - parser.add_argument( - '--project', - type=str, - help="name of project file geojson", - required=True, - dest="project" - ) - parser.add_argument( - '--output', - type=str, - help="name of output file for csv", - required=False, - dest="output" - ) - parser.add_argument( - '--epochs', - type=str, - help="comma seperated (but no spaces!) list of experiments to run for", - required=True, - dest="epochs" - ) - args = vars(parser.parse_args()) - - try: - with open(args['config_path'], 'r', encoding='utf-8') as config_file: - config = json.load(config_file) - except FileNotFoundError: - print(f'Failed to find configuration json file {args["config_path"]}') - sys.exit(-1) - except json.decoder.JSONDecodeError as exc: - print(f'Failed to parse {args["config_path"]} at line {exc.lineno}, column {exc.colno}: {exc.msg}') - sys.exit(-1) - - try: - experiment = config['experiments'][args['experiment']] - except KeyError: - if not 'experiments' in config: - print("No experiments section founnd in configuration json") - else: - print(f'Failed to find experiment with name {args["experiment"]}. Options found:') - for experiment in config['experiments']: - print(f'\t{experiment}') - sys.exit(-1) - - epoch_list = args['epochs'].split(',') - - try: - range_path = experiment['range'] - except KeyError: - print(f'Experiment "{args["experiment"]}" was missing range key.') - - batch = None - if 'iucn_batch' in experiment: - batch = TaxonFactories.loadBatchSource(experiment['iucn_batch']) - - # Work part 1: get the species list - species_list = project_species_list(args["project"], range_path) - - with file_writer(args["output"]) as output: - output.write('--taxid,--seasonality,--experiment\n') - for species_id, _ in species_list: - if batch: - # try: - species = TaxonFactories.TaxonFactoryRedListBatch(species_id, batch) - # except IndexError as e: - # # Some of the data in the batch needs tidy... - # print(f"Oh no {e}") - # continue - else: - try: - species = TaxonFactories.TaxonFactoryRedListAPI(species_id, config['iucn']['api_key']) - except KeyError: - print("Failed to find IUCN API key in config file or batch path in experiment.") - sys.exit(-1) - - seasonality_list = seasonality_for_species(species, range_path) - for season in seasonality_list: - for epoch in epoch_list: - output.write(f'{species_id},{season},{epoch}\n') - -if __name__ == "__main__": - main() diff --git a/tests/test_calculate.py b/tests/test_calculate.py deleted file mode 100644 index 11a27cf..0000000 --- a/tests/test_calculate.py +++ /dev/null @@ -1,80 +0,0 @@ -from typing import Any - -import numpy -import pytest -from osgeo import gdal - -from yirgacheffe import WGS_84_PROJECTION -from yirgacheffe.layers import YirgacheffeLayer, UniformAreaLayer -from yirgacheffe.window import Area, Window -import persistence - -class SingleValueLayer(YirgacheffeLayer): - """Mocked layer to make testing calc function easier""" - def __init__(self, value: Any, width: int, height: int): - self.value = value - area = Area( - left = -180.0, - top = 90.0, - right = 180.0, - bottom = -90.0 - ) - super().__init__(area, None, WGS_84_PROJECTION) - self._window = Window(0, 0, width, height) - - def read_array(self, xoffset: int, yoffset: int, xsize: int, ysize: int) -> Any: - assert (xoffset + xsize) <= self.window.xsize - assert (yoffset + ysize) <= self.window.ysize - return numpy.ones((ysize, xsize)) * self.value - -@pytest.mark.parametrize( - "habitat,elevation,range,area,habitats,elevation_range,expected_area", - [ - (100, 1234.0, True, 4.0, [100, 200, 300], (0.0, 10000.0), 4.0), - (100, 1234.0, False, 4.0, [100, 200, 300], (0.0, 10000.0), 0.0), - (100, 1234.0, True, 4.0, [200, 300], (0.0, 10000.0), 0.0), - (100, 1234.0, True, 4.0, [100, 200, 300], (0.0, 100.0), 0.0), - # (100, 1234.0, True, numpy.nan, [100, 200, 300], (0.0, 10000.0), 0.0), - ] -) -def test_calculate_simple(habitat,elevation,range,area,habitats,elevation_range,expected_area): - habitat_layer = SingleValueLayer(habitat, 1, 1) - elevation_layer = SingleValueLayer(elevation, 1, 1) - range_layer = SingleValueLayer(range, 1, 1) - area_layer = SingleValueLayer(area, 1, 1) - - persistence.YSTEP = 1 - area = persistence._calculate_cpu( - range_layer, - habitat_layer, - habitats, - elevation_layer, - elevation_range, - area_layer, - None - ) - assert area == expected_area - -@pytest.mark.parametrize("step_size", [1, 2, 3, 9, 10, 11]) -def test_calculate_step_sizes(step_size): - habitat_layer = SingleValueLayer(100, 10, 10) - elevation_layer = SingleValueLayer(1234.0, 10, 10) - range_layer = SingleValueLayer(True, 10, 10) - - # we want a non uniform area to make this interesting - area_dataset = gdal.GetDriverByName('mem').Create('mem', 1, 10, 1, gdal.GDT_Float32, []) - area_dataset.SetGeoTransform([-180.0, 180.0, 0.0, 90.0, 0.0, -18.0]) - area_dataset.GetRasterBand(1).WriteArray(numpy.array([[float(x)] for x in range(1, 11)]), 0, 0) - area_layer = UniformAreaLayer(area_dataset) - - persistence.YSTEP = step_size - area = persistence._calculate_cpu( - range_layer, - habitat_layer, - [100, 200, 300], - elevation_layer, - (0.0, 10000.0), - area_layer, - None - ) - assert area == 550.0 diff --git a/tiles2tiff.py b/tiles2tiff.py deleted file mode 100644 index 79fe337..0000000 --- a/tiles2tiff.py +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys - -from osgeo import gdal -import pandas as pd - -from yirgacheffe import WSG_84_PROJECTION -from yirgacheffe.window import Area, PixelScale -from yirgacheffe.layers import RasterLayer, H3CellLayer, YirgacheffeLayer - -def main() -> None: - if len(sys.argv) != 3: - print(f'USAGE: {sys.argv[0]} CSV TIF') - sys.exit(-1) - filename = sys.argv[1] - - # Make up the geo transform based on image resolution - width, height = 3840.0, 2180.0 # 4K screen - scale = PixelScale(360.0 / width, -180.0/height) - area = Area(left=-180.0, right=180, top=90, bottom=-90) - - ext = os.path.splitext(filename)[1] - if ext == '.parquet': - tiles_df = pd.read_parquet(filename) - elif ext == '.csv': - tiles_df = pd.read_csv(filename, index_col=False) - elif ext == '.hdf5': - tiles_df = pd.read_hdf(filename) - else: - print(f'unrecognised data type {ext}') - sys.exit(-1) - - # Every time you write to a gdal layer that has a file store you - # risk it trying to save the compressed file, which is slow. So - # we first use a memory only raster layer, and then at the end save - # the result we built up out to file. - scratch = RasterLayer.empty_raster_layer(area, scale, gdal.GDT_Float64) - - for _, tile, area in tiles_df.itertuples(): - if area == 0.0: - continue - try: - tile_layer = H3CellLayer(tile, scale, WSG_84_PROJECTION) - except ValueError: - print(f"Skipping tile with invalid id: {tile}") - continue - - scratch.reset_window() - layers = [scratch, tile_layer, scratch] - intersection = YirgacheffeLayer.find_intersection(layers) - for layer in layers: - layer.set_window_for_intersection(intersection) - - result = scratch + (tile_layer * area) - result.save(scratch) - - # now we've done the calc in memory, save it to a file - scratch.reset_window() - output = RasterLayer.empty_raster_layer_like(scratch, filename=sys.argv[2]) - scratch.save(output) - -if __name__ == "__main__": - main() diff --git a/unionsum.py b/unionsum.py deleted file mode 100644 index 49166ed..0000000 --- a/unionsum.py +++ /dev/null @@ -1,58 +0,0 @@ -from math import ceil -import os -import shutil -import sys -import tempfile - -from osgeo import gdal -from yirgacheffe.layers import Layer - -def main(): - layers = [Layer.layer_from_file(x) for x in sys.argv[1:]] - area = Layer.find_union(layers) - - for layer in layers: - layer.set_window_for_union(area) - pixel_pitch = layers[0].pixel_scale - - driver = gdal.GetDriverByName('GTiff') - with tempfile.TemporaryDirectory() as tempdir: - tmp_filename = os.path.join(tempdir, "results.tif") - - dataset = driver.Create( - tmp_filename, - ceil((area.right - area.left) / pixel_pitch[0]), - ceil((area.top - area.bottom) / (pixel_pitch[1] * -1)), - 1, - gdal.GDT_Float32, - [] - ) - if dataset is None: - print(f"Failed to create {tmp_filename}") - sys.exit(-1) - - dataset.SetGeoTransform([ - area.left, pixel_pitch[0], 0.0, area.top, 0.0, pixel_pitch[1] - ]) - dataset.SetProjection(layers[0].projection) - - output_band = dataset.GetRasterBand(1) - pixel_width = layers[0].window.xsize - pixel_height = layers[0].window.ysize - - for yoffset in range(pixel_height): - first = layers[0].read_array(0, yoffset, pixel_width, 1) - for other_layer in layers[1:]: - other = other_layer.read_array(0, yoffset, pixel_width, 1) - first = first + other - # Uncomment the below line to help see everything in QGIS - # first = numpy.logical_and(first > 0.0, True) - output_band.WriteArray(first, 0, yoffset) - - # Force a close on the dataset and move it to the final location - del dataset - shutil.move(tmp_filename, "result.tif") - - -if __name__ == "__main__": - main() diff --git a/utils/persistencegenerator.py b/utils/persistencegenerator.py new file mode 100644 index 0000000..aa39672 --- /dev/null +++ b/utils/persistencegenerator.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 + +import argparse +import os + +import pandas as pd + +def species_generator( + input_dir: str, + data_dir: str, + output_csv_path: str +): + taxas = os.listdir(input_dir) + + res = [] + for taxa in taxas: + taxa_path = os.path.join(input_dir, taxa, 'current') + speciess = os.listdir(taxa_path) + for scenario in ['arable', 'restore']: + for species in speciess: + for curve in ["0.1", "0.25", "0.5", "1.0", "gompertz"]: + res.append([ + os.path.join(data_dir, 'species-info', taxa, 'current', species), + os.path.join(data_dir, 'aohs', 'current', taxa), + os.path.join(data_dir, 'aohs', scenario, taxa), + os.path.join(data_dir, 'aohs', 'pnv', taxa), + curve, + os.path.join(data_dir, 'deltap', scenario, curve, taxa), + ]) + + + df = pd.DataFrame(res, columns=[ + '--speciesdata', + '--current_path', + '--scenario_path', + '--historic_path', + '--z', + '--output_path', + ]) + df.to_csv(output_csv_path, index=False) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Species and seasonality generator.") + parser.add_argument( + '--input', + type=str, + help="directory with taxa folders of species info", + required=True, + dest="input_dir" + ) + parser.add_argument( + '--datadir', + type=str, + help="directory for results", + required=True, + dest="data_dir", + ) + parser.add_argument( + '--output', + type=str, + help="name of output file for csv", + required=True, + dest="output" + ) + args = parser.parse_args() + + species_generator(args.input_dir, args.data_dir, args.output) + +if __name__ == "__main__": + main() diff --git a/raster_sum.py b/utils/raster_sum.py similarity index 73% rename from raster_sum.py rename to utils/raster_sum.py index 199964e..fc7579d 100644 --- a/raster_sum.py +++ b/utils/raster_sum.py @@ -24,36 +24,40 @@ def worker( if path is None: break - partial_raster = RasterLayer.layer_from_file(path) - - if merged_result is None: - merged_result = RasterLayer.empty_raster_layer_like(partial_raster, datatype=gdal.GDT_Float64) - cleaned_raster = partial_raster.numpy_apply(lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0)) - cleaned_raster.save(merged_result) - else: - merged_result.reset_window() - - union = YirgacheffeLayer.find_union([merged_result, partial_raster]) - merged_result.set_window_for_union(union) - partial_raster.set_window_for_union(union) - - calc = merged_result + (partial_raster.numpy_apply(lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0))) - temp = RasterLayer.empty_raster_layer_like(merged_result, datatype=gdal.GDT_Float64) - calc.save(temp) - merged_result = temp - - final = RasterLayer.empty_raster_layer_like(merged_result, filename=output_tif) - assert merged_result is not None - merged_result.save(final) - del merged_result + with RasterLayer.layer_from_file(path) as partial_raster: + if merged_result is None: + merged_result = RasterLayer.empty_raster_layer_like(partial_raster, datatype=gdal.GDT_Float64) + cleaned_raster = partial_raster.numpy_apply(lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0)) + cleaned_raster.save(merged_result) + else: + merged_result.reset_window() + + union = YirgacheffeLayer.find_union([merged_result, partial_raster]) + merged_result.set_window_for_union(union) + partial_raster.set_window_for_union(union) + + calc = merged_result + ( + partial_raster.numpy_apply(lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0)) + ) + temp = RasterLayer.empty_raster_layer_like(merged_result, datatype=gdal.GDT_Float64) + calc.save(temp) + merged_result = temp + + if merged_result: + final = RasterLayer.empty_raster_layer_like(merged_result, filename=output_tif) + merged_result.save(final) def build_k( images_dir: str, output_filename: str, processes_count: int ) -> None: + result_dir, filename = os.path.split(output_filename) + os.makedirs(result_dir, exist_ok=True) files = [os.path.join(images_dir, x) for x in glob.glob("*.tif", root_dir=images_dir)] + if not files: + sys.exit(f"No files in {images_dir}, aborting") with tempfile.TemporaryDirectory() as tempdir: with Manager() as manager: @@ -85,7 +89,6 @@ def build_k( time.sleep(1) # here we should have now a set of images in tempdir to merge - result_dir, filename = os.path.split(output_filename) single_worker = Process(target=worker, args=( filename, result_dir, diff --git a/utils/regression_plot.py b/utils/regression_plot.py new file mode 100644 index 0000000..4605926 --- /dev/null +++ b/utils/regression_plot.py @@ -0,0 +1,90 @@ +import argparse +import functools +import operator +import os +import random +import sys +from multiprocessing import Pool, cpu_count + +import matplotlib.pyplot as plt +import numpy as np +from yirgacheffe.layers import RasterLayer + +def filter_data(chunks): + a_chunk, b_chunk = chunks + res = [] + for a, b in zip(a_chunk, b_chunk): + if np.isnan(a) or np.isnan(b): + continue + if a == 0.0 and b == 0.0: + continue + res.append((a, b)) + return res + +def regression_plot( + a_path: str, + b_path: str, + output_path: str, +) -> None: + output_dir, _ = os.path.split(output_path) + os.makedirs(output_dir, exist_ok=True) + + with RasterLayer.layer_from_file(a_path) as a_layer: + with RasterLayer.layer_from_file(b_path) as b_layer: + if a_layer.pixel_scale != b_layer.pixel_scale: + sys.exit("GeoTIFFs have different pixel scale") + if a_layer.area != b_layer.area: + sys.exit("GeoTIFFs have different spatial coordinates") + if a_layer.window != b_layer.window: + sys.exit("GeoTIFFs have different pixel dimensions") + + a_pixels = a_layer.read_array(0, 0, a_layer.window.xsize, a_layer.window.ysize) + b_pixels = b_layer.read_array(0, 0, b_layer.window.xsize, b_layer.window.ysize) + + with Pool(processes=cpu_count() // 2) as pool: + filtered_chunk_pairs = pool.map(filter_data, zip(a_pixels, b_pixels)) + filtered_pairs = functools.reduce(operator.iconcat, filtered_chunk_pairs, []) + sampled_pairs = random.sample(filtered_pairs, len(filtered_pairs) // 10) + a_filtered, b_filtered = zip(*sampled_pairs) + + # m, b = np.polyfit(a_filtered, b_filtered, 1) + + _fig, ax = plt.subplots() + ax.scatter(x=a_filtered, y=b_filtered, marker=",") + plt.xlabel(os.path.basename(a_path)) + plt.ylabel(os.path.basename(b_path)) + plt.savefig(output_path) + +def main() -> None: + parser = argparse.ArgumentParser(description="Generates a scatter plot comparing two GeoTIFFs.") + parser.add_argument( + "--a", + type=str, + required=True, + dest="a", + help="First GeoTIFF" + ) + parser.add_argument( + "--b", + type=str, + required=True, + dest="b", + help="Second GeoTIFF" + ) + parser.add_argument( + "--output", + type=str, + required=True, + dest="output", + help="Destination png file for results." + ) + args = parser.parse_args() + + regression_plot( + args.a, + args.b, + args.output, + ) + +if __name__ == "__main__": + main() diff --git a/utils/speciesgenerator.py b/utils/speciesgenerator.py new file mode 100644 index 0000000..24a0eb6 --- /dev/null +++ b/utils/speciesgenerator.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 + +import argparse +import os + +import pandas as pd + +def species_generator( + input_dir: str, + data_dir: str, + output_csv_path: str +): + taxas = os.listdir(input_dir) + + res = [] + for taxa in taxas: + for scenario in ['current', 'restore', 'arable', 'pnv']: + source = 'historic' if scenario == 'pnv' else 'current' + taxa_path = os.path.join(input_dir, taxa, source) + speciess = os.listdir(taxa_path) + for species in speciess: + res.append([ + os.path.join(os.path.join(data_dir, "habitat_maps"), scenario), + os.path.join(data_dir, "elevation-max-1k.tif"), + os.path.join(data_dir, "elevation-min-1k.tif"), + os.path.join(data_dir, "area-per-pixel.tif"), + os.path.join(data_dir, "crosswalk.csv"), + os.path.join(os.path.join(data_dir, "species-info/"), taxa, source, species), + os.path.join(os.path.join(data_dir, "aohs/"), scenario, taxa) + ]) + + + df = pd.DataFrame(res, columns=[ + '--habitats', + '--elevation-max', + '--elevation-min', + '--area', + '--crosswalk', + '--speciesdata', + '--output' + ]) + df.to_csv(output_csv_path, index=False) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Species and seasonality generator.") + parser.add_argument( + '--input', + type=str, + help="directory with taxa folders of species info", + required=True, + dest="input_dir" + ) + parser.add_argument( + '--datadir', + type=str, + help="directory for results", + required=True, + dest="data_dir", + ) + parser.add_argument( + '--output', + type=str, + help="name of output file for csv", + required=True, + dest="output" + ) + args = parser.parse_args() + + species_generator(args.input_dir, args.data_dir, args.output) + +if __name__ == "__main__": + main() diff --git a/vt316generator.py b/vt316generator.py deleted file mode 100644 index 8948bb1..0000000 --- a/vt316generator.py +++ /dev/null @@ -1,141 +0,0 @@ -import argparse -import contextlib -import json -import sys -from typing import Set -import seasonality -#from aoh.lib import seasonality -from iucn_modlib.classes.Taxon import Taxon -from iucn_modlib.factories import TaxonFactories - -@contextlib.contextmanager -def file_writer(file_name = None): - writer = open(file_name, "w", encoding="utf-8") if file_name is not None else sys.stdout - yield writer - if file_name: - writer.close() - -def seasonality_for_species(species: Taxon, range_file: str) -> Set[str]: - og_seasons = set( - seasonality.habitatSeasonality(species) + - seasonality.rangeSeasonality(range_file, species.taxonid) - ) - if len(og_seasons) == 0: - return {} - seasons = {'resident'} - if len(og_seasons.difference({'resident'})) > 0: - seasons = {'breeding', 'nonbreeding'} - return seasons - -def main() -> None: - parser = argparse.ArgumentParser(description="Species and seasonality generator.") - parser.add_argument( - '--experiment', - type=str, - help="name of experiment group from configuration json", - required=True, - dest="experiment" - ) - parser.add_argument( - '--config', - type=str, - help="path of configuration json", - required=False, - dest="config_path", - default="config.json" - ) - parser.add_argument( - '--list', - type=str, - help="list of all species", - required=True, - dest="list" - ) - parser.add_argument( - '--output', - type=str, - help="name of output file for csv", - required=False, - dest="output" - ) - parser.add_argument( - '--epochs', - type=str, - help="comma seperated (but no spaces!) list of experiments to run for", - required=True, - dest="epochs" - ) - parser.add_argument( - '--class', - type=str, - help="Options are 'MAMMALIA', 'AVES', 'AMPHIBIA' and 'REPTILIA'", - required=True, - dest="class" - ) - - args = vars(parser.parse_args()) - - try: - with open(args['config_path'], 'r', encoding='utf-8') as config_file: - config = json.load(config_file) - except FileNotFoundError: - print(f'Failed to find configuration json file {args["config_path"]}') - sys.exit(-1) - except json.decoder.JSONDecodeError as exc: - print(f'Failed to parse {args["config_path"]} at line {exc.lineno}, column {exc.colno}: {exc.msg}') - sys.exit(-1) - - try: - experiment = config['experiments'][args['experiment']] - except KeyError: - if not 'experiments' in config: - print("No experiments section founnd in configuration json") - else: - print(f'Failed to find experiment with name {args["experiment"]}. Options found:') - for experiment in config['experiments']: - print(f'\t{experiment}') - sys.exit(-1) - - epoch_list = args['epochs'].split(',') - - try: - range_path = experiment['range'] - except KeyError: - print(f'Experiment "{args["experiment"]}" was missing range key.') - - batch = None - if 'iucn_batch' in experiment: - batch = TaxonFactories.loadBatchSource(experiment['iucn_batch']) - - # Work part 1: get the species list - with open(args["list"], "r", encoding="utf-8") as listfile: - all_species = listfile.readlines() - - species_class = args['class'] - - species_list = [int(x.split(',')[1]) for x in all_species if species_class in x] - - with file_writer(args["output"]) as output: - output.write('--taxid,--seasonality,--experiment\n') - for species_id in species_list: - if batch: - try: - species = TaxonFactories.TaxonFactoryRedListBatch(species_id, batch) - except IndexError: - # Some of the data in the batch needs tidy... - print(f'{species_id} not in batch') - continue - else: - try: - species = TaxonFactories.TaxonFactoryRedListAPI(species_id, config['iucn']['api_key']) - except KeyError: - print("Failed to find IUCN API key in config file or batch path in experiment.") - sys.exit(-1) - - seasonality_list = seasonality_for_species(species, range_path) - for season in seasonality_list: - for epoch in epoch_list: - output.write(f'{species_id},{season},{epoch}\n') - -if __name__ == "__main__": - main()