From f672abf53b303e473429d3e5b64a70dd8f95516b Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Fri, 6 Sep 2024 10:34:46 +0100
Subject: [PATCH 01/36] Remove unused scripts

---
 h3calculate.py | 375 -------------------------------------------------
 mollweide.py   | 110 ---------------
 2 files changed, 485 deletions(-)
 delete mode 100644 h3calculate.py
 delete mode 100644 mollweide.py

diff --git a/h3calculate.py b/h3calculate.py
deleted file mode 100644
index 53f3a8b..0000000
--- a/h3calculate.py
+++ /dev/null
@@ -1,375 +0,0 @@
-import itertools
-import json
-import math
-import os
-import re
-import subprocess
-import sys
-import time
-from multiprocessing import Pool, cpu_count
-
-import h3
-import pandas as pd
-import pyarrow as pa
-import pyarrow.parquet as pq
-from osgeo import ogr
-from yirgacheffe.layers import RasterLayer, VectorLayer, H3CellLayer
-from yirgacheffe.window import PixelScale
-
-try:
-    COMMIT = subprocess.check_output('git rev-parse HEAD', shell=True).decode('utf-8').strip()
-    if len(subprocess.check_output('git diff -q', shell=True)) != 0:
-        COMMIT += '*'
-except subprocess.CalledProcessError:
-    COMMIT = 'unknown'
-
-# This regular expression is how we get the species ID from the filename
-FILERE = re.compile(r'^Seasonality.(\w+)-(\d+).tif$')
-
-MAG = 7
-
-LONG_BAND_WIDTH = 1.0
-
-def threads() -> int:
-    return cpu_count()
-
-def geometry_to_pointlist(geo):
-    points = []
-    for i in range(geo.GetPointCount()):
-        point = geo.GetPoint(i)
-        points.append((point[1], point[0]))
-    return points
-
-def geometry_to_polygons(geo, subdivide=True):
-    geotype = geo.GetGeometryType()
-
-    if geotype == ogr.wkbMultiPolygon:
-        count = geo.GetGeometryCount()
-        polygons = [] # [None] * count
-        for i in range(count):
-            subgeometry = geo.GetGeometryRef(i)
-            subpolys = geometry_to_polygons(subgeometry)
-            assert len(subpolys) == 1
-            polygon = subpolys[0]
-
-            # envelope is (long_left, long_right, lat_bottom, lat_top)
-            envelope = subgeometry.GetEnvelope()
-            longitude_width = envelope[1] - envelope[0]
-            if (longitude_width < LONG_BAND_WIDTH) or not subdivide:
-                polygons.append(polygon)
-                continue
-
-            # This poly is quite wide, so split it into smaller chunks
-            # OGR is quite slow (relative to the test of the work here)
-            # so we just do a simple lat banding
-            try:
-                slices = []
-                for i in range(math.ceil(longitude_width / LONG_BAND_WIDTH)):
-                    left = envelope[0] + (i * LONG_BAND_WIDTH)
-                    right = envelope[0] + ((i + 1) * LONG_BAND_WIDTH)
-                    frame = {
-                        'type': 'POLYGON',
-                        'coordinates': [
-                            [
-                                [left, envelope[3]],
-                                [right, envelope[3]],
-                                [right, envelope[2]],
-                                [left, envelope[2]],
-                                [left, envelope[3]],
-                            ]
-                        ]
-                    }
-                    band_geometry = ogr.CreateGeometryFromJson(json.dumps(frame))
-                    if band_geometry is None:
-                        raise ValueError("Failed to create mask for slicing")
-                    intersection = subgeometry.Intersection(band_geometry)
-                    if intersection is None:
-                        raise ValueError("Failed to create intersection")
-                    if not intersection.IsEmpty():
-                        slices.append(intersection)
-
-                for intersection in slices:
-                    polygons += geometry_to_polygons(intersection, subdivide=False)
-            except ValueError:
-                # In rare cases it seems OGR doesn't like the original geometry for
-                # creating an intersection. I've seen errors like:
-                #
-                # ERROR 1: TopologyException: Input geom 0 is invalid: Ring Self-intersection at or near point...
-                #
-                # and the general advice I've seen is to keep fudging geometries until it
-                # works, which isn't a scalable solution. Instead we just take the hit and turn the entire
-                # polygon into hextiles in a single pass.
-                polygons.append(polygon)
-
-        return polygons
-
-    elif geotype == ogr.wkbPolygon:
-        points = []
-        for i in range(geo.GetGeometryCount()):
-            points.append(geometry_to_pointlist(geo.GetGeometryRef(i)))
-        polygon = h3.Polygon(*points) # pylint: disable=E1120
-        return [polygon]
-
-    elif geotype == ogr.wkbGeometryCollection:
-        count = geo.GetGeometryCount()
-        polygons = []
-        for i in range(count):
-            polygons += geometry_to_polygons(geo.GetGeometryRef(i), subdivide=False)
-        return polygons
-
-    elif geotype == ogr.wkbPoint:
-        print(geo)
-        return []
-
-    else:
-        raise ValueError(f"unknown type {geotype}: {geo.GetGeometryName()}")
-
-def polygon_to_tiles(polygon):
-
-    list_of_tiles = []
-
-    # First we get all the cells with a mid point within the polygon
-    try:
-        tiles = h3.polygon_to_cells(polygon, MAG)
-        list_of_tiles.append(tiles)
-    except MemoryError:
-        # It seems that in some rare cases we have generated very narrow slices as a result of the
-        # fragmenting we do, and that causes h3 to get super confused and run out of memory. This
-        # is most likely a bug in h3 but I can't say why currently, and don't have more time right
-        # now to dig in. Thankfully though, because of the second stage in this method where we
-        # expand the boundary of the polygon, this kinda fixes skipping the polygon_to_cells in
-        # all cases I've seen, so we at least have a temporary work around.
-        pass
-
-    # now for every vertice on the polygon, work use the minimum distance path to approximate
-    # all cells on the boundary
-    polygons = [polygon.outer] + list(polygon.holes)
-    for loop in polygons:
-        if loop[0] != loop[-1]:
-            loop.append(loop[0])
-        for i in range(len(loop) - 1):
-            start = loop[i]
-            end = loop[i + 1]
-            start_cell = h3.latlng_to_cell(*start, MAG)
-            end_cell = h3.latlng_to_cell(*end, MAG)
-
-            line = [start_cell, end_cell]
-
-            if start_cell != end_cell:
-                try:
-                    distance_estimate = h3.grid_distance(start_cell, end_cell)
-                except Exception as exc: # pylint: disable=W0718
-                    # if the distance is too far then h3 will give up
-                    # this is usually along the boundaries added by
-                    # the chunking we do to let us parallelise things, and so
-                    # we don't mind, as the polygon_to_cell is sufficient there
-                    print(f'Failed to find path from {start} to {end}: {exc}')
-                    continue
-
-                # In an ideal world we'd use h3.grid_path_cells() for this, but in some places
-                # we observe that this does not take the direct route, and the docs do not
-                # say that it'll produce an optimal output, nor that the results are stable.
-                # Instead we do this approximation by hand, which isn't guaranteed to generate
-                # a contiguous line of cells, but is good enough, particularly once we add
-                # cell padding, which we did anyway even on the original implementation that
-                # had h3.grid_path_cells()
-                if distance_estimate:
-                    diffs = (
-                        (end[0] - start[0]) / float(distance_estimate),
-                        (end[1] - start[1]) / float(distance_estimate)
-                    )
-                    for i in range(distance_estimate):
-                        here = (start[0] + (diffs[0] * i), start[1] + (diffs[1] * i))
-                        cell = h3.latlng_to_cell(*here, MAG)
-                        assert h3.is_valid_cell(cell)
-                        line.append(cell)
-                else:
-                    line = h3.grid_path_cells(
-                        h3.latlng_to_cell(*start, MAG),
-                        h3.latlng_to_cell(*end, MAG)
-                    )
-
-            list_of_tiles.append(line)
-            for cell in line:
-                list_of_tiles.append(h3.grid_disk(cell, 3))
-
-
-    tiles = itertools.chain.from_iterable(list_of_tiles)
-
-    return tiles
-
-def process_cell(args):
-    aoh_layer_path, tile = args
-
-    # Load the raster of total aoh of species
-    aoh_layer = RasterLayer.layer_from_file(aoh_layer_path)
-
-    # create a layer the H3 cell of interest
-    tile_layer = H3CellLayer(tile, aoh_layer.pixel_scale, aoh_layer.projection)
-
-    # calculate intersection
-    layers = [aoh_layer, tile_layer]
-    try:
-        intersection = RasterLayer.find_intersection(layers)
-    except ValueError:
-        return (tile, 0.0)
-    for layer in layers:
-        try:
-            layer.set_window_for_intersection(intersection)
-        except:
-            print(f'Failed to intersect {tile} with for {layer} with area {layer.area} and {intersection}')
-            raise
-
-    # work out area of habitate contributed by just that cell
-    calc = aoh_layer * tile_layer
-    try:
-        tile_aoh = calc.sum()
-    except:
-        print(f' Failed to process {tile} with {intersection} at scale {aoh_layer.pixel_scale}')
-        raise
-
-    return (tile, tile_aoh)
-
-
-def tiles_to_area(aoh_layer_path, species_id, tiles, target_file, timestamp_2):
-    # we now have all the tiles, so work out the AoH in just that tile
-    results = []
-    args = [(aoh_layer_path, tile) for tile in tiles]
-
-    with Pool(processes=threads()) as pool:
-        results = pool.map(process_cell, args)
-
-    timestamp_3 = time.time()
-    print(f"Processed {len(tiles)} tiles in {timestamp_3 - timestamp_2} seconds "\
-        "- {float(len(tiles)) / (timestamp_3 - timestamp_2)} tiles per second")
-
-    dataframe = pd.DataFrame(results, columns=['cell', 'area'])
-    table = pa.Table.from_pandas(dataframe).replace_schema_metadata({
-        b'experiment': json.dumps({
-            'species': species_id,
-            'source': aoh_layer_path,
-            'user': os.environ['USER'],
-            'timestamp': time.time(),
-            'host': os.uname()[1],
-            'src': __file__,
-            'commit': COMMIT,
-        }).encode('utf8')
-    })
-    pq.write_table(table, target_file, compression='GZIP')
-
-    return dataframe.loc[:, 'area'].sum()
-
-
-def get_original_aoh_info(raster_path: str) -> float:
-    aoh_layer = RasterLayer.layer_from_file(raster_path)
-    return aoh_layer.sum()
-
-
-def get_range_polygons(range_path, species_id):
-    where_filter = f"id_no = {species_id} and season = 'resident'"
-
-    # The pixel scale and projection don't matter here, as we're just
-    # abusing yirgacheffe to load the range file. Feels like I need to split this
-    # out at some point
-    layer = VectorLayer(range_path, where_filter, PixelScale(0.1, 0.1), "UNUSED")
-    range_layer = layer.layer
-    range_layer.ResetReading()
-    polygons = []
-    feature = range_layer.GetNextFeature()
-    while feature:
-        geo = feature.GetGeometryRef()
-        polygons.append(geometry_to_polygons(geo, subdivide=True))
-        feature = range_layer.GetNextFeature()
-    return list(itertools.chain.from_iterable(polygons))
-
-
-def main() -> None:
-    if len(sys.argv) != 5:
-        print(f'Usage: {sys.argv[0]} [AoH raster directory] [Range file] [Output directory] [Direction]')
-        sys.exit(1)
-
-    current_rasters_dir = sys.argv[1]
-    range_file = sys.argv[2]
-    output_dir = sys.argv[3]
-    direction = sys.argv[4]
-
-    print(direction)
-    try:
-        os.makedirs(output_dir, exist_ok=True)
-    except FileExistsError:
-        print(f'Could not create {output_dir} as file is there')
-        sys.exit(1)
-    except PermissionError:
-        print(f'Could not create {output_dir} due to permissions')
-        sys.exit(1)
-
-    species_list = [FILERE.match(x).groups() for x in os.listdir(current_rasters_dir) if FILERE.match(x)]
-
-    if direction == "forward":
-        print("Running H3 forward")
-        species_list.sort()
-    elif direction == "reverse":
-        print("Running H3 backwards")
-        species_list.sort(reverse=True)
-
-    # for test run, just do first dozen
-    for season, species_id in species_list:
-        print(species_id, season)
-
-        file_prefix = season.lower()[:3]
-
-        # Due to errors as we find new corner cases, we keep having to restart the script
-        # so we don't overwrite old results and just keep moving on.
-        old_target_file = os.path.join(output_dir, f'{file_prefix}_{species_id}_{MAG}.csv')
-        target_file = os.path.join(output_dir, f'{file_prefix}_{species_id}_{MAG}.parquet')
-        if os.path.exists(target_file) or os.path.exists(old_target_file):
-            print('Species result exists, skipping')
-            continue
-
-        start = time.time()
-        aoh_layer_path = os.path.join(current_rasters_dir, f'Seasonality.{season}-{species_id}.tif')
-
-        # We can't currently parallelise either of these tasks, but they are independant, so we can
-        # at least run them concurrently...
-        try:
-            with Pool(processes=threads()) as pool:
-                res_aoh_total = pool.apply_async(get_original_aoh_info, (aoh_layer_path,))
-                res_polygons = pool.apply_async(get_range_polygons, (range_file, species_id))
-
-                aoh_layer_total = res_aoh_total.get()
-                polygons = res_polygons.get()
-        except (FileNotFoundError, TypeError):
-            print(f'Failed to load raster for {species_id}, skipping')
-            continue
-        except ValueError:
-            print(f'Species {species_id} had bad range, skipping')
-            continue
-
-        if aoh_layer_total == 0.0:
-            print(f'Skipping species, as AoH is {aoh_layer_total}')
-            continue
-
-        timestamp_1 = time.time()
-        print(f"Found {len(polygons)} polygons in {timestamp_1 - start} seconds")
-
-        # The h3 lookup can be ran concurrently thought
-        tiles = set()
-        with Pool(processes=threads()) as pool:
-            results = pool.map(polygon_to_tiles, polygons)
-            tiles = set(itertools.chain.from_iterable(results))
-
-        timestamp_2 = time.time()
-        print(f"Found {len(tiles)} tiles in {timestamp_2 - timestamp_1} seconds")
-
-        total = tiles_to_area(aoh_layer_path, species_id, tiles, target_file, timestamp_2)
-        diff = ((total - aoh_layer_total) / aoh_layer_total) * 100.0
-        if f'{abs(diff):.5f}' != '0.00000':
-            print(f'AoH layer total: {aoh_layer_total}')
-            print(f'Hex tile total:  {total}')
-            print(f'Error is {diff:.5f}%')
-
-        end = time.time()
-        print(f'{species_id} at mag {MAG} took {end - start} seconds')
-
-if __name__ == "__main__":
-    main()
diff --git a/mollweide.py b/mollweide.py
deleted file mode 100644
index c86a845..0000000
--- a/mollweide.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import argparse
-import os
-import shutil
-import tempfile
-
-import numpy as np
-import pandas as pd
-import h3
-from osgeo import gdal
-from pyproj import CRS, Transformer
-
-TARGET_WIDTH=7267
-TARGET_HEIGHT=3385
-
-EXTENT_MIN_X, EXTENT_MAX_X, EXTENT_MIN_Y, EXTENT_MAX_Y = -18027854.1353249, 18027101.8531421, -7965787.75894896, 8828766.53043604  # pylint: disable=C0301
-
-PIXEL_SCALE_X = (EXTENT_MAX_X - EXTENT_MIN_X) / TARGET_WIDTH
-PIXEL_SCALE_Y = (EXTENT_MAX_Y - EXTENT_MIN_Y) / TARGET_HEIGHT
-
-PROJ = """PROJCRS[\"Mollweide\",\n    BASEGEOGCRS[\"WGS 84\",\n        DATUM[\"D_unknown\",\n            ELLIPSOID[\"WGS84\",6378137,298.257223563,\n                LENGTHUNIT[\"metre\",1,\n                    ID[\"EPSG\",9001]]]],\n        PRIMEM[\"Greenwich\",0,\n            ANGLEUNIT[\"Degree\",0.0174532925199433]]],\n    CONVERSION[\"unnamed\",\n        METHOD[\"Mollweide\"],\n        PARAMETER[\"Longitude of natural origin\",0,\n            ANGLEUNIT[\"Degree\",0.0174532925199433],\n            ID[\"EPSG\",8802]],\n        PARAMETER[\"False easting\",0,\n            LENGTHUNIT[\"metre\",1],\n            ID[\"EPSG\",8806]],\n        PARAMETER[\"False northing\",0,\n            LENGTHUNIT[\"metre\",1],\n            ID[\"EPSG\",8807]]],\n    CS[Cartesian,2],\n        AXIS[\"(E)\",east,\n            ORDER[1],\n            LENGTHUNIT[\"metre\",1,\n                ID[\"EPSG\",9001]]],\n        AXIS[\"(N)\",north,\n            ORDER[2],\n            LENGTHUNIT[\"metre\",1,\n                ID[\"EPSG\",9001]]]]"""  # pylint: disable=C0301
-
-def generate_mollweide(
-    tiles_csv_filename: str,
-    output_filename: str,
-) -> None:
-    tiles_df = pd.read_csv(tiles_csv_filename)
-
-    wgs85_crs = CRS.from_string("EPSG:4326")
-    mollweide_crs = CRS.from_string(PROJ)
-    transformer = Transformer.from_crs(wgs85_crs, mollweide_crs, always_xy=True)
-
-    # work out the pixel scale
-    # x_scale = (transformer.transform(180, 0)[0] * 2.0) / TARGET_WIDTH
-    # y_scale = (transformer.transform(0, 90)[1] * 2.0) / TARGET_HEIGHT
-    x_scale = PIXEL_SCALE_X
-    y_scale = PIXEL_SCALE_Y
-    print(f"pixel scale: {x_scale}, {y_scale}")
-
-    raw = np.zeros((TARGET_HEIGHT, TARGET_WIDTH)).tolist()
-
-    with tempfile.TemporaryDirectory() as tempdir:
-        tempname = os.path.join(tempdir, "result.tif")
-        output_dataset = gdal.GetDriverByName("gtiff").Create(
-            tempname,
-            TARGET_WIDTH,
-            TARGET_HEIGHT,
-            1,
-            gdal.GDT_Float64,
-            ['COMPRESS=LZW'],
-        )
-        output_dataset.SetProjection(PROJ)
-        output_dataset.SetGeoTransform((
-                EXTENT_MIN_X, x_scale, 0.0,
-                EXTENT_MIN_Y, 0.0, y_scale
-        ))
-        band = output_dataset.GetRasterBand(1)
-
-        for _, row in tiles_df.iterrows():
-            tileid, area = row
-            try:
-                lat, lng = h3.cell_to_latlng(tileid)
-            except ValueError:
-                print(f"Failed to process {tileid}")
-                continue
-            x_mollweide, y_mollweide = transformer.transform(lng, lat)  # pylint: disable=E0633
-            x_mollweide -= EXTENT_MAX_X
-            y_mollweide -= EXTENT_MIN_Y
-
-            xpos = round((x_mollweide / x_scale))
-            ypos = round((y_mollweide / y_scale))
-            val = raw[ypos][xpos]
-            if val == 0:
-                val = [area]
-            else:
-                val.append(area)
-            raw[ypos][xpos] = val
-
-        # Now we need to average all the cells
-        for yoffset in range(TARGET_HEIGHT):
-            for xoffset in range(TARGET_WIDTH):
-                val = raw[yoffset][xoffset]
-                raw[yoffset][xoffset] = np.mean(val)
-
-        band.WriteArray(np.array(raw), 0, 0)
-        del output_dataset
-
-        shutil.move(tempname, output_filename)
-
-def main() -> None:
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--tiles",
-        type=str,
-        required=True,
-        dest="tiles_csv_filename",
-        help="CSV containing h3 tiles and values."
-    )
-    parser.add_argument(
-        "--output",
-        type=str,
-        required=True,
-        dest="output_filename",
-        help="Filename for output GeoTIFF."
-    )
-    args = parser.parse_args()
-
-    generate_mollweide(args.tiles_csv_filename, args.output_filename)
-
-if __name__ == "__main__":
-    main()

From f4c0e759bb07e48f5cb08262797723d9345ee40f Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Fri, 6 Sep 2024 10:40:54 +0100
Subject: [PATCH 02/36] Add new AoH calculator code as submodule

---
 .gitmodules    | 3 +++
 aoh-calculator | 1 +
 2 files changed, 4 insertions(+)
 create mode 100644 .gitmodules
 create mode 160000 aoh-calculator

diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..30fd8a6
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "aoh-calculator"]
+	path = aoh-calculator
+	url = git@github.com:quantifyearth/aoh-calculator.git
diff --git a/aoh-calculator b/aoh-calculator
new file mode 160000
index 0000000..a920bbc
--- /dev/null
+++ b/aoh-calculator
@@ -0,0 +1 @@
+Subproject commit a920bbc95857efee9c06f691cd07fd098df89dc3

From 06a97f0728285499ca1061635e27b2bda67d7618 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Mon, 9 Sep 2024 10:27:26 +0100
Subject: [PATCH 03/36] Get AoH flow working

---
 aohcalc.py              | 162 -----------------------
 flow.yml                | 275 ----------------------------------------
 method.md               | 250 ++++++++++++++++++++++++++++++++++++
 persistence/__init__.py | 252 ------------------------------------
 requirements.txt        |  14 +-
 5 files changed, 259 insertions(+), 694 deletions(-)
 delete mode 100644 aohcalc.py
 delete mode 100644 flow.yml
 create mode 100644 method.md
 delete mode 100644 persistence/__init__.py

diff --git a/aohcalc.py b/aohcalc.py
deleted file mode 100644
index 2c37cf4..0000000
--- a/aohcalc.py
+++ /dev/null
@@ -1,162 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import json
-import os
-import sys
-
-
-import cProfile
-import pstats
-
-from iucn_modlib.factories import TaxonFactories
-
-import persistence
-
-
-parser = argparse.ArgumentParser(description="Area of habitat calculator.")
-parser.add_argument(
-    '--taxid',
-    type=int,
-    help="animal taxonomy id",
-    required=True,
-    dest="species"
-)
-parser.add_argument(
-    '--seasonality',
-    type=str,
-    help="which season to calculate for (breeding, nonbreeding, or resident)",
-    required=True,
-    dest="seasonality"
-)
-parser.add_argument(
-    '--experiment',
-    type=str,
-    help="name of experiment group from configuration json",
-    required=True,
-    dest="experiment"
-)
-parser.add_argument(
-    '--config',
-    type=str,
-    help="path of configuration json",
-    required=False,
-    dest="config_path",
-    default="config.json"
-)
-parser.add_argument(
-    '--geotiffs',
-    type=str,
-    help='directory where area geotiffs should be stored',
-    required=False,
-    dest='results_path',
-    default=None,
-)
-parser.add_argument(
-    '--nogpu',
-    type=str,
-    help='disable CUDA usage',
-    required=False,
-    dest='nogpu',
-    default='True',
-)
-parser.add_argument(
-    '--profile',
-    type=bool,
-    help='enable profiling',
-    required=False,
-    dest='profile',
-    default=False,
-)
-args = vars(parser.parse_args())
-
-if args['nogpu'].lower() in ['t', 'true']:
-    persistence.USE_GPU = False
-
-try:
-    seasonality = persistence.Seasonality(args['seasonality'])
-except ValueError:
-    print(f'Seasonality {args["seasonality"]} is not valid')
-    sys.exit(-1)
-
-try:
-    with open(args['config_path'], 'r', encoding='utf-8') as config_file:
-        config = json.load(config_file)
-except FileNotFoundError:
-    print(f'Failed to find configuration json file {args["config_path"]}')
-    sys.exit(-1)
-except json.decoder.JSONDecodeError as e:
-    print(f'Failed to parse {args["config_path"]} at line {e.lineno}, column {e.colno}: {e.msg}')
-    sys.exit(-1)
-
-try:
-    experiment = config['experiments'][args['experiment']]
-except KeyError:
-    if not 'experiments' in config:
-        print("No experiments section founnd in configuration json")
-    else:
-        print(f'Failed to find experiment with name {args["experiment"]}. Options found:')
-        for experiment in config['experiments']:
-            print(f'\t{experiment}')
-    sys.exit(-1)
-
-if 'iucn_batch' in experiment:
-    batch = TaxonFactories.loadBatchSource(experiment['iucn_batch'])
-    species = TaxonFactories.TaxonFactoryRedListBatch(args['species'], batch)
-else:
-    try:
-        species = TaxonFactories.TaxonFactoryRedListAPI(args['species'], config['iucn']['api_key'])
-    except KeyError:
-        print("Failed to find IUCN API key in config file or batch path in experiment.")
-        sys.exit(-1)
-
-try:
-    translator = experiment['translator']
-    if translator == 'jung':
-        TranslatorType = persistence.JungModel
-    elif translator == 'esacci':
-        TranslatorType = persistence.ESACCIModel
-    else:
-        print(f'Translator type of "{translator}" not recognised. Expected "jung" or "esacci".')
-        sys.exit(-1)
-except KeyError:
-    print(f'Experiment {args["experiment"]} is missing a translator key')
-    sys.exit(-1)
-
-try:
-    land = TranslatorType(
-        experiment['habitat'],
-        experiment['elevation'],
-        experiment['area']
-    )
-except KeyError:
-    print(f'Experiment "{args["experiment"]}" was missing one or more of the map keys: "habitat", "elevation", "area".')
-    sys.exit(-1)
-
-try:
-    range_path = experiment['range']
-except KeyError:
-    print(f'Experiment "{args["experiment"]}" was missing range key.')
-
-if args['results_path']:
-    if not os.path.isdir(args['results_path']):
-        print(f'Provided results path {args["results_path"]} is not a directory')
-        sys.exit(-1)
-
-# pylint: disable=C0103
-profiler = None
-if args['profile']:
-    profiler = cProfile.Profile()
-    profiler.enable()
-
-try:
-    result = persistence.calculator(species, range_path, land, seasonality, args['results_path'])
-except KeyboardInterrupt:
-    pass
-
-if profiler is not None:
-    profiler.disable()
-    p = pstats.Stats(profiler)
-    p.sort_stats(pstats.SortKey.TIME).print_stats(20)
-
-print(', '.join([str(x) for x in result]))
diff --git a/flow.yml b/flow.yml
deleted file mode 100644
index c56852d..0000000
--- a/flow.yml
+++ /dev/null
@@ -1,275 +0,0 @@
-IUCN-download:
-  type: "download"
-  author:
-    - ae491
-  synopsis: "Download IUCN data from website"
-  outputs:
-    - IUCN raw data
-
-IUCN-importer:
-  type: "program"
-  author:
-    - mb
-  inputs:
-    - IUCN raw data
-  synopsis: "Takes raw IUCN bulk download and turns it to gpkg"
-  outputs:
-    - IUCN refined data
-
-IUCN-filter:
-  type: "program"
-  synopsis: "filter the cleaned up IUCN data for the actual experiment"
-  author:
-    - ae491
-    - mwd24
-  inputs:
-    - IUCN refined data
-  outputs:
-    - IUCN experiment data
-
-"habitat":
-  type: "group"
-  children:
-    habitat-download-current:
-      type: "download"
-      author:
-        - ae491
-      ninputs:
-        - experiment-config
-      outputs:
-        - habitat-map-current
-
-    habitat-download-historic:
-      type: "download"
-      author:
-        - ae491
-      ninputs:
-        - experiment-config
-      outputs:
-        - habitat-map-historic
-
-    habitat-generate-restore:
-      type: "program"
-      author:
-        - ae491
-      inputs:
-        - habitat-map-current
-      outputs:
-        - habitat-map-restore
-
-    habitat-generate-arable:
-      type: "program"
-      author:
-        - ae491
-      inputs:
-        - habitat-map-current
-      outputs:
-        - habitat-map-arable
-
-area-download:
-  type: "download"
-  author:
-    - ae491
-  outputs:
-    - area-file
-
-area-data-optimize:
-  type: "program"
-  author:
-    - mwd24
-  inputs:
-    - area-file
-  outputs:
-    - tiny-area-file
-
-elevation-download:
-  type: "download"
-  author:
-    - ae491
-  outputs:
-    - elevation-map
-
-
-
-"AoH calc":
-  type: "group"
-  children:
-
-      speciesgenerator.py:
-        type: "program"
-        author:
-          - mwd24
-        synopsis: "Generates the species/seasonality list per project."
-        inputs:
-          - IUCN experiment data
-        outputs:
-          - species-season-job-list
-
-      "aohcalc.py current_L1L2":
-        type: "littlejohn"
-        name: "aohcalc.py"
-        author:
-          - mwd24
-        synopsis: "Generates raster of area oh habitat per species/seasonality"
-        inputs:
-          - elevation-map
-          - habitat-map-current
-          - IUCN experiment data
-          - tiny-area-file
-          - species-season-job-list
-        outputs:
-          - "AoH-raster current_L1L2*"
-
-      "aohcalc.py PNV":
-        type: "littlejohn"
-        name: "aohcalc.py"
-        author:
-          - mwd24
-        synopsis: "Generates raster of area oh habitat per species/seasonality"
-        inputs:
-          - elevation-map
-          - habitat-map-historic
-          - IUCN experiment data
-          - tiny-area-file
-          - species-season-job-list
-        outputs:
-          - "AoH PNV CSV"
-
-      "aohcalc.py arable":
-        type: "littlejohn"
-        name: "aohcalc.py"
-        author:
-          - mwd24
-        synopsis: "Generates raster of area oh habitat per species/seasonality"
-        inputs:
-          - elevation-map
-          - habitat-map-arable
-          - IUCN experiment data
-          - tiny-area-file
-          - species-season-job-list
-        outputs:
-          - "AoH-raster arable*"
-
-      "aohcalc.py restore":
-        type: "littlejohn"
-        name: "aohcalc.py"
-        author:
-          - mwd24
-        synopsis: "Generates raster of area oh habitat per species/seasonality"
-        inputs:
-          - elevation-map
-          - habitat-map-restore
-          - IUCN experiment data
-          - tiny-area-file
-          - species-season-job-list
-        outputs:
-          - "AoH-raster restore*"
-
-
-      "downsample.py current_L1L2":
-        type: "program"
-        name: "downsample.py"
-        author:
-          - mwd24
-        synopsis: "Takes the Jung scale rasters and reduces them to 1 arc minute"
-        inputs:
-          - "AoH-raster current_L1L2"
-        outputs:
-          - "AoH-small-raster current_L1L2*"
-
-      "downsample.py restore":
-        type: "program"
-        name: "downsample.py"
-        author:
-          - mwd24
-        synopsis: "Takes the Jung scale rasters and reduces them to 1 arc minute"
-        inputs:
-          - "AoH-raster restore"
-        outputs:
-          - "AoH-small-raster restore*"
-
-      "downsample.py arable":
-        type: "program"
-        name: "downsample.py"
-        author:
-          - mwd24
-        synopsis: "Takes the Jung scale rasters and reduces them to 1 arc minute"
-        inputs:
-          - "AoH-raster arable"
-        outputs:
-          - "AoH-small-raster arable*"
-
-
-
-"final":
-  type: "groups"
-  names:
-    - "z=0.1"
-    - "z=0.25"
-    - "z=0.5"
-    - "z=1.0"
-    - "z=gompertz"
-  children:
-    "gcrgen.py arable":
-      type: "program"
-      name: "gcrgen.py"
-      author:
-        - tsb42
-      inputs:
-        - "AoH-small-raster arable"
-      outputs:
-        - deltap-job-list-arable
-
-    "gcrgen.py restore":
-      type: "program"
-      name: "gcrgen.py"
-      author:
-        - tsb42
-      inputs:
-        - "AoH-small-raster restore"
-      outputs:
-        - deltap-job-list-restore
-
-    "generate-difference-tifs arable":
-      type: "littlejohn"
-      author:
-        - mwd24
-        - ae491
-        - tsb42
-      inputs:
-        - deltap-job-list-arable
-        - "AoH-small-raster arable"
-        - "AoH-small-raster current_L1L2"
-        - "AoH PNV CSV"
-      outputs:
-        - "per-species-delta-p-tif arable*"
-
-    "generate-difference-tifs restore":
-      type: "littlejohn"
-      author:
-        - mwd24
-        - ae491
-        - tsb42
-      inputs:
-        - deltap-job-list-restore
-        - "AoH-small-raster restore"
-        - "AoH-small-raster current_L1L2"
-        - "AoH PNV CSV"
-      outputs:
-        - "per-species-delta-p-tif restore*"
-
-    "generate-deltap-per-taxa":
-      type: "program"
-      synopsis: "aka raster_sum.py"
-      inputs:
-        - "per-species-delta-p-tif arable"
-      outputs:
-        - delta-p-per-taxa-arable.tif
-
-    "generate-deltap-per-taxa":
-      type: "program"
-      synopsis: "aka raster_sum.py"
-      inputs:
-        - "per-species-delta-p-tif restore"
-      outputs:
-        - delta-p-per-taxa-restore.tif
diff --git a/method.md b/method.md
new file mode 100644
index 0000000..7a47b81
--- /dev/null
+++ b/method.md
@@ -0,0 +1,250 @@
+---
+path: /root
+---
+# How to run the pipeline for LIFE
+
+## Build the environment
+
+
+### The geospatial compute container
+
+The dockerfile that comes with the repo should be used to run the pipeline.
+
+```
+docker build buildx --tag aohbuilder .
+```
+
+For use with the [shark pipeline](https://github.com/quantifyearth/shark), we need this block to trigger a build currently:
+
+```shark-build:aohbuilder
+((from ghcr.io/osgeo/gdal:ubuntu-small-3.8.5)
+ (run (network host) (shell "apt-get update -qqy && apt-get -y install python3-pip libpq-dev git && rm -rf /var/lib/apt/lists/* && rm -rf /var/cache/apt/*"))
+ (run (network host) (shell "pip install --upgrade pip"))
+ (run (network host) (shell "pip install 'numpy<2'"))
+ (run (network host) (shell "pip install gdal[numpy]==3.8.5"))
+ (run (shell "mkdir -p /root"))
+ (workdir "/root")
+ (copy (src "requirements.txt") (dst "./"))
+ (run (network host) (shell "pip install --no-cache-dir -r requirements.txt"))
+ (copy (src "prepare-layers") (dst "./"))
+ (copy (src "prepare-species") (dst "./"))
+ (copy (src "aoh-calculator") (dst "./"))
+ (copy (src "deltap") (dst "./"))
+)
+```
+
+For the primary data sources we fetch them directly from Zenodo/GitHub to allow for obvious provenance.
+
+```shark-build:reclaimer
+((from carboncredits/reclaimer:latest))
+```
+
+For the projection changes we use a barebones GDAL container. The reason for this is that these operations are expensive, and we don't want to re-execute them if we update our code.
+
+```shark-build:gdalonly
+((from ghcr.io/osgeo/gdal:ubuntu-small-3.8.5))
+```
+
+Alternatively you can build your own python virtual env assuming you have everything required. For this you will need at least a GDAL version installed locally, and you may want to update requirements.txt to match the python GDAL bindings to the version you have installed.
+
+```
+python3 -m virtualenv ./venv
+. ./venv/bin/activate
+pip install -r requirements.txt
+```
+
+### The PostGIS container
+
+For querying the IUCN data held in the PostGIS database we use a seperate container, based on the standard PostGIS image.
+
+```shark-build:postgis
+((from python:3.12-slim)
+ (run (network host) (shell "apt-get update -qqy && apt-get -y install libpq-dev gcc git && rm -rf /var/lib/apt/lists/* && rm -rf /var/cache/apt/*"))
+ (run (network host) (shell "pip install psycopg2 SQLalchemy geopandas"))
+ (run (network host) (shell "pip install git+https://github.com/quantifyearth/pyshark"))
+ (copy (src "./") (dst "/root/"))
+ (workdir "/root/")
+ (run (shell "chmod 755 *.py"))
+)
+```
+
+## Fetching the required data
+
+To calculate the AoH we need various basemaps:
+
+* Habitat maps for four scenarios:
+    * Current day, in both L1 and L2 IUCN habitat classification
+    * Potential Natural Vegetation (PNV) showing the habitats predicted without human intevention
+    * Restore scenario - a map derived from the PNV and current maps showing certain lands restored to their pre-human
+    * Conserve scenario - a map derived form current indicating the impact of placement of arable lands
+* The Digital Elevation Map (DEM) which has the height per pixel in meters
+
+All these maps must be at the same pixel spacing and projection, and the output AoH maps will be at that same pixel resolution and projection.
+
+Habitat maps store habitat types in int types typically, the IUCN range data for species are of the form 'x.y' or 'x.y.z', and so you will need to also get a crosswalk table that maps between the IUCN ranges for the species and the particular habitat map you are using.
+
+### Fetching the habitat maps
+
+LIFE uses the work of Jung et al to get both the [current day habitat map](https://zenodo.org/records/4058819) and the [PNV habitat map](https://zenodo.org/records/4038749).
+
+To assist with provenance, we download the data from the Zenodo ID.
+
+```shark-run:reclaimer
+reclaimer zenodo --zenodo_id 4038749 \
+                 --filename pnv_lvl1_004.zip \
+                 --extract \
+                 --output /data/habitat/pnv_raw.tif
+reclaimer zenodo --zenodo_id 4058819 \
+                 --filename iucn_habitatclassification_composite_lvl2_ver004.zip \
+                 --extract \
+                 --output /data/habitat/jung_l2_raw.tif
+```
+
+For LIFE the crosswalk table is generated using code by Daniele Baisero's [IUCN Modlib](https://gitlab.com/daniele.baisero/iucn-modlib/) package:
+
+```shark-run:aohbuilder
+python3 ./prepare-layers/generate_crosswalk.py --output /data/crosswalk.csv
+```
+
+The PNV map is only classified at Level 1 of the IUCN habitat codes, and so to match this non-artificial habitats in the L2 map are converted, as per Eyres et al:
+
+| The current layer maps IUCN level 1 and 2 habitats, but habitats in the PNV layer are mapped only at IUCN level 1, so to estimate species’ proportion of original AOH now remaining we could only use natural habitats mapped at level 1 and artificial habitats at level 2.
+
+```shark-run:aohbuilder
+python3 ./prepare-layers/make_current_map.py --jung /data/habitat/jung_l2_raw.tif \
+                                             --crosswalk /data/crosswalk.csv \
+                                             --output /data/habitat/current_raw.tif \
+                                             -j 16
+```
+
+The habitat map by Jung et al is at 100m resolution in World Berhman projection, and for IUCN compatible AoH maps we use Molleide at 1KM resolution, so we use GDAL to do the resampling for this:
+
+```shark-run:aohbuilder
+python3 ./aoh-calculator/habitat_process.py --habitat /data/habitat/pnv_raw.tif \
+                                            --scale 0.016666666666667 \
+                                            --output /data/habitat_maps/pnv/
+```
+
+```shark-run:aohbuilder
+python3 ./aoh-calculator/habitat_process.py --habitat /data/habitat/current_raw.tif \
+                                            --scale 0.016666666666667 \
+                                            --output /data/habitat_maps/current/
+```
+
+
+### Generating additional habitat maps
+
+From [Eyres et al]():
+
+For the restoration map:
+
+| In the restoration scenario all areas classified as arable or pasture were restored to their PNV.
+
+```shark-run:aohbuilder
+python3 ./prepare-layers/make_restore_map.py --pnv /data/habitat/pnv_raw.tif \
+                                   --current /data/habitat/current_raw.tif \
+                                   --crosswalk /data/crosswalk.csv \
+                                   --output /data/habitat/restore.tif
+
+ python3 ./aoh-calculator/habitat_process.py --habitat /data/habitat/restore.tif \
+                                             --scale 0.016666666666667 \
+                                             --output /data/habitat_maps/restore/
+```
+
+For the conservation map:
+
+| In the conversion scenario all habitats currently mapped as natural or pasture were converted to arable land.
+
+```shark-run:aohbuilder
+python3 ./prepare-layers/make_arable_map.py --current /data/habitat/current_raw.tif \
+                                  --crosswalk /data/crosswalk.csv \
+                                  --output /data/habitat/arable.tif
+
+python3 ./aoh-calculator/habitat_process.py --habitat /data/habitat/arable.tif \
+                                            --scale 0.016666666666667 \
+                                            --output /data/habitat_maps/arable/
+```
+
+
+### Fetching the elevation map
+
+To assist with provenance, we download the data from the Zenodo ID.
+
+```shark-run:reclaimer
+reclaimer zenodo --zenodo_id 5719984  --filename dem-100m-esri54017.tif --output /data/elevation.tif
+```
+
+Similarly to the habitat map we need to resample to 1km, however rather than picking the mean elevation, we select both the min and max elevation for each pixel, and then check whether the species is in that range when we calculate AoH.
+
+```shark-run:gdalonly
+gdalwarp -t_srs EPSG:4326 -tr 0.016666666666667 -0.016666666666667 -r min -co COMPRESS=LZW -wo NUM_THREADS=40 /data/elevation.tif /data/elevation-min-1k.tif
+gdalwarp -t_srs EPSG:4326 -tr 0.016666666666667 -0.016666666666667 -r max -co COMPRESS=LZW -wo NUM_THREADS=40 /data/elevation.tif /data/elevation-max-1k.tif
+```
+
+
+## Calculating AoH
+
+Once all the data has been collected, we can now calclate the AoH maps.
+
+### Get per species range data
+
+Rather than calculate from the postgis database directly, we first split out the data into a single GeoJSON file per species per season:
+
+```shark-run:postgis
+export DB_HOST=somehost
+export DB_USER=username
+export DB_PASSWORD=secretpassword
+export DB_NAME=iucnredlist
+
+python3 ./prepare-species/extract_species_psql.py --output /data/species-info/ --projection "EPSG:4326"
+```
+
+The reason for doing this primarly one of pipeline optimisation, though it also makes the tasks of debugging and provenance tracing much easier. Most build systems, including the one we use, let you notice when files have updated and only do the work required based on that update. If we have many thousands of species on the redlise and only a few update, if we base our calculation on a single file with all species in, we'll have to calculate all thousands of results. But with this step added in, we will re-generate the per species per season GeoJSON files, which is cheap, but then we can spot that most of them haven't changed and we don't need to then calculate the rasters for those ones in the next stage.
+
+
+### Calculate AoH
+
+This step generates a single AoH raster for a single one of the above GeoJSON files.
+
+```shark-run:aohbuilder
+python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/current/ \
+                                    --elevation-max /data/elevation-max-1k.tif \
+                                    --elevation-min /data/elevation-min-1k.tif \
+                                    --crosswalk /data/crosswalk.csv \
+                                    --speciesdata /data/species-info/current/* \
+                                    --output /data/aohs/current/
+
+python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/restore/ \
+                                    --elevation-max /data/elevation-max-1k.tif \
+                                    --elevation-min /data/elevation-min-1k.tif \
+                                    --crosswalk /data/crosswalk.csv \
+                                    --speciesdata /data/species-info/current/* \
+                                    --output /data/aohs/restore/
+
+python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/arable/ \
+                                    --elevation-max /data/elevation-max-1k.tif \
+                                    --elevation-min /data/elevation-min-1k.tif \
+                                    --crosswalk /data/crosswalk.csv \
+                                    --speciesdata /data/species-info/current/* \
+                                    --output /data/aohs/arable/
+
+python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/pnv/ \
+                                    --elevation-max /data/elevation-max-1k.tif \
+                                    --elevation-min /data/elevation-min-1k.tif \
+                                    --crosswalk /data/crosswalk.csv \
+                                    --speciesdata /data/species-info/historic/* \
+                                    --output /data/aohs/pnv/
+```
+
+The results you then want will all be in:
+
+```shark-publish
+/data/aohs/current/
+/data/aohs/restore/
+/data/aohs/arable/
+/data/aohs/pnv/
+```
+
+
+## Calculating persistence maps
+
diff --git a/persistence/__init__.py b/persistence/__init__.py
deleted file mode 100644
index 39fe890..0000000
--- a/persistence/__init__.py
+++ /dev/null
@@ -1,252 +0,0 @@
-# AoH calculator code for the 4C persistence calculator, a more specialised
-# version of the logic from working on Daniele Baisero's AoH library.
-#
-# There's two seperate versions of the actual calculation - one for CPU use
-# one for use with CUDA GPUs. Originally I wanted to have one function with
-# conditional bits of code, but almost all the code ended up being conditional
-# one way or the other, so the logic was hard to read. So instead we now have
-# some duplication, but it is easier to see the logic in each one.
-
-import os
-import shutil
-import tempfile
-from dataclasses import dataclass
-from enum import Enum
-from typing import List, Optional, Any, Tuple
-
-import numpy
-from osgeo import gdal
-try:
-    import cupy
-    import cupyx
-    USE_GPU = True
-except ModuleNotFoundError:
-    USE_GPU = False
-
-from iucn_modlib.classes.Taxon import Taxon
-from iucn_modlib.classes.HabitatFilters import HabitatFilters
-import iucn_modlib.translator
-
-from yirgacheffe.layers import RasterLayer, VectorLayer, ConstantLayer, UniformAreaLayer, YirgacheffeLayer
-
-# When working with rasters we read larger chunks that just a single line, despite that usually
-# being what GDAL recommends if you ask for the efficient block size for larger files. There's
-# two reasons for this:
-# 1: We use DynamicVectorRangeLayer to incrementally rasterize the vector habitat maps, so as to
-#    not need to hold the entire raster in memory at once. Doing that on a per line basis is
-#    somewhat slow. Thus the step is a tradeoff between memory allocation and CPU cost of
-#    processing the vectors. Moving from 1 line to 512 lines cut the runtime by close to half for
-#    the small sample I tested.
-# 2: With the CUDA version of the calculator you have a cost of moving the data from main memory
-#    over to GPU memory and back. Again, doing so on a line by line basis is inefficient, and using
-#    a larger chunk size gives us better efficiency.
-YSTEP = 512
-
-
-@dataclass
-class LandModel:
-    habitat_map_filename: str
-    elevation_map_filename: str
-    area_map_filename: Optional[str]
-    translator: Any
-
-    def new_habitat_layer(self) -> RasterLayer:
-        return RasterLayer.layer_from_file(self.habitat_map_filename)
-
-    def new_elevation_layer(self) -> RasterLayer:
-        return RasterLayer.layer_from_file(self.elevation_map_filename)
-
-    def new_area_layer(self) -> YirgacheffeLayer:
-        if self.area_map_filename is None:
-            return ConstantLayer(1.0)
-        try:
-            return UniformAreaLayer.layer_from_file(self.area_map_filename)
-        except ValueError:
-            return RasterLayer.layer_from_file(self.area_map_filename)
-
-class JungModel(LandModel):
-    def __init__(self, habitat_map_filename: str, elevation_map_filename: str, area_map_filename: Optional[str] = None):
-        super().__init__(habitat_map_filename, elevation_map_filename, area_map_filename, iucn_modlib.translator.toJung)
-
-class ESACCIModel(LandModel):
-    def __init__(self, habitat_map_filename: str, elevation_map_filename: str, area_map_filename: Optional[str] = None):
-        super().__init__(habitat_map_filename, elevation_map_filename, area_map_filename,
-            iucn_modlib.translator.toESACCI)
-
-
-class Seasonality(Enum):
-    RESIDENT = "resident"
-    BREEDING = "breeding"
-    NONBREEDING = "nonbreeding"
-
-    @property
-    def iucn_seasons(self) -> Tuple:
-        if self.value == 'resident':
-            return ('Resident', 'Seasonal Occurrence Unknown')
-        elif self.value == 'breeding':
-            return ('Resident', 'Breeding Season', 'Seasonal Occurrence Unknown')
-        elif self.value == 'nonbreeding':
-            return ('Resident', 'Non-Breeding Season', 'Seasonal Occurrence Unknown')
-        else:
-            raise NotImplementedError(f'Unhandled seasonlity value {self.value}')
-
-
-def calculator(
-    species: Taxon,
-    range_path: str,
-    land_model: LandModel,
-    seasonality: Seasonality,
-    results_path: Optional[str]
-) -> Tuple[float, Optional[str]]:
-
-    # We do not re-use data in this, so set a small block cache size for GDAL, otherwise
-    # it pointlessly hogs memory, and then spends a long time tidying it up after.
-    gdal.SetCacheMax(1024 * 1024 * 16)
-
-    habitat_params = iucn_modlib.HabitatFilters(
-        season = seasonality.iucn_seasons,
-        suitability = ('Suitable', 'Unknown'),
-        majorImportance = ('Yes', 'No'),
-    )
-    habitat_list = land_model.translator(species.habitatCodes(habitat_params))
-
-    # These three map layers don't change across seasons
-    habitat_layer = land_model.new_habitat_layer()
-    elevation_layer = land_model.new_elevation_layer()
-    area_layer = land_model.new_area_layer()
-
-    # range layer is only one that is seasonal, so recalculate
-    where_filter =  f"id_no = {species.taxonid} and season in ('{seasonality.value}', 'resident')"
-    pixel_scale = habitat_layer.pixel_scale
-    assert pixel_scale
-    try:
-        range_layer = VectorLayer.layer_from_file(range_path, where_filter, pixel_scale, habitat_layer.projection)
-    except ValueError:
-        return 0.0, None
-
-    # Work out the intersection of all the maps
-    layers = [habitat_layer, elevation_layer, area_layer, range_layer]
-    try:
-        intersection = YirgacheffeLayer.find_intersection(layers)
-    except ValueError:
-        for layer in layers:
-            print(f'Scale of {layer} is {layer.pixel_scale}')
-        raise
-    for layer in layers:
-        layer.set_window_for_intersection(intersection)
-
-    with tempfile.TemporaryDirectory() as tempdir:
-        results_layer = None
-        results_dataset_filename = ''
-        if results_path:
-            results_dataset_filename = f'{seasonality}-{species.taxonid}.tif'
-            results_layer = RasterLayer.empty_raster_layer_like(
-                habitat_layer,
-                os.path.join(tempdir, results_dataset_filename),
-                datatype=gdal.GDT_Float32,
-            )
-
-        calculate_function = _calculate_cpu if not USE_GPU else _calculate_cuda
-        result = calculate_function(
-            range_layer,
-            habitat_layer,
-            habitat_list,
-            elevation_layer,
-            (species.elevation_lower, species.elevation_upper),
-            area_layer,
-            results_layer,
-        )
-        # if we got here, then consider the experiment a success
-        if results_layer and results_path:
-            del results_layer # aka close for gdal
-            shutil.move(os.path.join(tempdir, results_dataset_filename),
-                os.path.join(results_path, results_dataset_filename))
-        return result, results_dataset_filename
-
-
-def _calculate_cpu(
-    range_layer: YirgacheffeLayer,
-    habitat_layer: YirgacheffeLayer,
-    habitat_list: List,
-    elevation_layer: YirgacheffeLayer,
-    elevation_range: Tuple[float, float],
-    area_layer: YirgacheffeLayer,
-    results_layer: Optional[YirgacheffeLayer]
-) -> float:
-    filtered_habitat = habitat_layer.numpy_apply(lambda chunk: numpy.isin(chunk, habitat_list))
-    filtered_elevation = elevation_layer.numpy_apply(lambda chunk:
-        numpy.logical_and(chunk >= min(elevation_range), chunk <= max(elevation_range)))
-
-    # TODO: this isn't free - so if there's no nan's we'd like to avoid this stage
-    #cleaned_area = area_layer.numpy_apply(lambda chunk: numpy.nan_to_num(chunk, copy=False, nan=0.0))
-
-    data = filtered_habitat * filtered_elevation * area_layer * range_layer
-    if results_layer:
-        return data.save(results_layer, and_sum=True)
-    else:
-        return data.sum()
-
-
-def _calculate_cuda(
-    range_layer: YirgacheffeLayer,
-    habitat_layer: YirgacheffeLayer,
-    habitat_list: List,
-    elevation_layer: YirgacheffeLayer,
-    elevation_range: Tuple[float, float],
-    area_layer: YirgacheffeLayer,
-    results_layer: Optional[YirgacheffeLayer]
-) -> float:
-
-    # all layers now have the same window width/height, so just take the habitat one
-    pixel_width = habitat_layer.window.xsize
-    pixel_height = habitat_layer.window.ysize
-
-    aoh_shader = cupy.ElementwiseKernel(
-        'bool habitat, int16 elevation, uint8 species_range, float64 pixel_area',
-        'float64 result',
-        'result = (species_range && habitat && ' \
-            f'((elevation >= {min(elevation_range)}) && (elevation <= {max(elevation_range)})));' \
-            'result = result * pixel_area',
-        'my_shader'
-    )
-    aoh_reduction_shader = cupy.ReductionKernel(
-        'bool habitat, int16 elevation, uint8 species_range, float64 pixel_area',
-        'float64 result',
-        f'(species_range && habitat && ((elevation >= {min(elevation_range)}) && ' \
-            f'(elevation <= {max(elevation_range)}))) * pixel_area',
-        'a + b',
-        'result = a',
-        '0.0',
-        'my_reduction_shader'
-    )
-
-    habitat_list = cupy.array(habitat_list)
-
-    area_total = 0.0
-    data = None
-    for yoffset in range(0, pixel_height, YSTEP):
-        this_step = YSTEP
-        if yoffset + this_step > pixel_height:
-            this_step = pixel_height - yoffset
-
-        habitat, elevation, species_range, pixel_areas = [
-            cupy.array(x.read_array(0, yoffset, pixel_width, this_step))
-            for x in [habitat_layer, elevation_layer, range_layer, area_layer]
-        ]
-
-        filtered_habitat = cupy.isin(habitat, habitat_list)
-
-        # if we don't need to store out the geotiff then we can do
-        # the summation and sum in a single reduction shader. Otherwise we need to
-        # calc to an area and then reduce, which is slower but is the price of
-        # getting the intermediary data
-        if not results_layer:
-            area_total += aoh_reduction_shader(filtered_habitat, elevation, species_range, pixel_areas)
-        else:
-            if data is None or data.shape != filtered_habitat.shape:
-                data = cupy.zeros(filtered_habitat.shape, cupy.float64)
-            aoh_shader(filtered_habitat, elevation, species_range, pixel_areas, data)
-            area_total += cupy.sum(data)
-            results_layer._dataset.GetRasterBand(1).WriteArray(data.get(), 0, yoffset) # pylint: disable=W0212
-
-    return area_total
diff --git a/requirements.txt b/requirements.txt
index 9e8f6a8..57a630e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,12 +1,16 @@
-requests
-numpy
-gdal[numpy]<3.7
+geopandas
+numpy<2
 pyarrow
 pandas
-geopandas
-h3==4.0.0b1
+psutil
 scipy
+pyproj
+scikit-image
 rasterio
+requests
+alive-progress
+
+gdal[numpy]
 
 git+https://github.com/carboncredits/iucn_modlib.git
 git+https://github.com/carboncredits/yirgacheffe

From cdae77014f54e6edd176412c175048b08f42dfdc Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Mon, 9 Sep 2024 10:34:45 +0100
Subject: [PATCH 04/36] Tidy top level

---
 tiles2tiff.py                                 | 65 -------------------
 downsample.py => utils/downsample.py          |  0
 raster_sum.py => utils/raster_sum.py          |  0
 readmeta.py => utils/readmeta.py              |  0
 .../speciesgenerator.py                       |  0
 unionsum.py => utils/unionsum.py              |  0
 vt316generator.py => utils/vt316generator.py  |  0
 7 files changed, 65 deletions(-)
 delete mode 100644 tiles2tiff.py
 rename downsample.py => utils/downsample.py (100%)
 rename raster_sum.py => utils/raster_sum.py (100%)
 rename readmeta.py => utils/readmeta.py (100%)
 rename speciesgenerator.py => utils/speciesgenerator.py (100%)
 rename unionsum.py => utils/unionsum.py (100%)
 rename vt316generator.py => utils/vt316generator.py (100%)

diff --git a/tiles2tiff.py b/tiles2tiff.py
deleted file mode 100644
index 79fe337..0000000
--- a/tiles2tiff.py
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import sys
-
-from osgeo import gdal
-import pandas as pd
-
-from yirgacheffe import WSG_84_PROJECTION
-from yirgacheffe.window import Area, PixelScale
-from yirgacheffe.layers import RasterLayer, H3CellLayer, YirgacheffeLayer
-
-def main() -> None:
-    if len(sys.argv) != 3:
-        print(f'USAGE: {sys.argv[0]} CSV TIF')
-        sys.exit(-1)
-    filename = sys.argv[1]
-
-    # Make up the geo transform based on image resolution
-    width, height = 3840.0, 2180.0 # 4K screen
-    scale = PixelScale(360.0 / width, -180.0/height)
-    area = Area(left=-180.0, right=180, top=90, bottom=-90)
-
-    ext = os.path.splitext(filename)[1]
-    if ext == '.parquet':
-        tiles_df = pd.read_parquet(filename)
-    elif ext == '.csv':
-        tiles_df = pd.read_csv(filename, index_col=False)
-    elif ext == '.hdf5':
-        tiles_df = pd.read_hdf(filename)
-    else:
-        print(f'unrecognised data type {ext}')
-        sys.exit(-1)
-
-    # Every time you write to a gdal layer that has a file store you
-    # risk it trying to save the compressed file, which is slow. So
-    # we first use a memory only raster layer, and then at the end save
-    # the result we built up out to file.
-    scratch = RasterLayer.empty_raster_layer(area, scale, gdal.GDT_Float64)
-
-    for _, tile, area in tiles_df.itertuples():
-        if area == 0.0:
-            continue
-        try:
-            tile_layer = H3CellLayer(tile, scale, WSG_84_PROJECTION)
-        except ValueError:
-            print(f"Skipping tile with invalid id: {tile}")
-            continue
-
-        scratch.reset_window()
-        layers = [scratch, tile_layer, scratch]
-        intersection = YirgacheffeLayer.find_intersection(layers)
-        for layer in layers:
-            layer.set_window_for_intersection(intersection)
-
-        result = scratch + (tile_layer * area)
-        result.save(scratch)
-
-    # now we've done the calc in memory, save it to a file
-    scratch.reset_window()
-    output = RasterLayer.empty_raster_layer_like(scratch, filename=sys.argv[2])
-    scratch.save(output)
-
-if __name__ == "__main__":
-    main()
diff --git a/downsample.py b/utils/downsample.py
similarity index 100%
rename from downsample.py
rename to utils/downsample.py
diff --git a/raster_sum.py b/utils/raster_sum.py
similarity index 100%
rename from raster_sum.py
rename to utils/raster_sum.py
diff --git a/readmeta.py b/utils/readmeta.py
similarity index 100%
rename from readmeta.py
rename to utils/readmeta.py
diff --git a/speciesgenerator.py b/utils/speciesgenerator.py
similarity index 100%
rename from speciesgenerator.py
rename to utils/speciesgenerator.py
diff --git a/unionsum.py b/utils/unionsum.py
similarity index 100%
rename from unionsum.py
rename to utils/unionsum.py
diff --git a/vt316generator.py b/utils/vt316generator.py
similarity index 100%
rename from vt316generator.py
rename to utils/vt316generator.py

From 044791948ea34e8a8d015ec466b089dcfa6f44b0 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Mon, 9 Sep 2024 13:53:46 +0100
Subject: [PATCH 05/36] Add scripts for data prep before AoH

---
 prepare-layers/generate_crosswalk.py    |  64 +++++++++++
 prepare-layers/make_arable_map.py       | 111 ++++++++++++++++++
 prepare-layers/make_area_map.py         |  90 +++++++++++++++
 prepare-layers/make_current_map.py      | 111 ++++++++++++++++++
 prepare-layers/make_restore_map.py      | 128 +++++++++++++++++++++
 prepare-species/cleaning.py             |  34 ++++++
 prepare-species/extract_species_psql.py | 145 ++++++++++++++++++++++++
 prepare-species/species_prep.py         | 144 +++++++++++++++++++++++
 8 files changed, 827 insertions(+)
 create mode 100644 prepare-layers/generate_crosswalk.py
 create mode 100644 prepare-layers/make_arable_map.py
 create mode 100644 prepare-layers/make_area_map.py
 create mode 100644 prepare-layers/make_current_map.py
 create mode 100644 prepare-layers/make_restore_map.py
 create mode 100644 prepare-species/cleaning.py
 create mode 100644 prepare-species/extract_species_psql.py
 create mode 100644 prepare-species/species_prep.py

diff --git a/prepare-layers/generate_crosswalk.py b/prepare-layers/generate_crosswalk.py
new file mode 100644
index 0000000..3b7488f
--- /dev/null
+++ b/prepare-layers/generate_crosswalk.py
@@ -0,0 +1,64 @@
+import argparse
+import os
+
+import pandas as pd
+from iucn_modlib.translator import toJung
+
+
+# Take from https://www.iucnredlist.org/resources/habitat-classification-scheme
+IUCN_HABITAT_CODES = [
+    "1", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7", "1.8", "1.9",
+    "2", "2.1", "2.2",
+    "3", "3.1", "3.2", "3.3", "3.4", "3.5", "3.6", "3.7", "3.8",
+    "4", "4.1", "4.2", "4.3", "4.4", "4.5", "4.6", "4.7",
+    "5", "5.1", "5.2", "5.3", "5.4", "5.5", "5.6", "5.7", "5.8", "5.9",
+    "5.10", "5.11", "5.12", "5.13", "5.14", "5.15", "5.16", "5.17", "5.18",
+    "6"
+    "7", "7.1", "7.2",
+    "8", "8.1", "8.2", "8.3",
+    "9", "9.1", "9.2", "9.3", "9.4", "9.5", "9.6", "9.7", "9.8", "9.9", "9.10",
+    "9.8.1", "9.8.2", "9.8.3", "9.8.4", "9.8.5", "9.8.6",
+    "10", "10.1", "10.2", "10.3", "10.4",
+    "11", "11.1", "11.1.1", "11.1.2", "11.2", "11.3", "11.4", "11.5", "11.6",
+    "12", "12.1", "12.2", "12.3", "12.4", "12.5", "12.6", "12.7",
+    "13", "13.1", "13.2", "13.3", "13.4", "13.5",
+    "14", "14.1", "14.2", "14.3", "14.4", "14.5", "14.6",
+    "15", "15.1", "15.2", "15.3", "15.4", "15.5", "15.6", "15.7", "15.8",
+    "15.9", "15.10", "15.11", "15.12", "15.13",
+    "16",
+    "17",
+    "18",
+]
+
+def generate_crosswalk(
+    output_filename: str,
+) -> None:
+    output_dir, _ = os.path.split(output_filename)
+    if output_dir:
+        os.makedirs(output_dir, exist_ok=True)
+
+    res = []
+    for iucn_code in IUCN_HABITAT_CODES:
+        try:
+            for jung_code in toJung([iucn_code]):
+                res.append([iucn_code, jung_code])
+        except ValueError:
+            continue
+
+    df = pd.DataFrame(res, columns=["code", "value"])
+    df.to_csv(output_filename, index=False)
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Generate a Jung crosswalk table as CSV.")
+    parser.add_argument(
+        '--output',
+        type=str,
+        help='Path where final crosswalk should be stored',
+        required=True,
+        dest='output_filename',
+    )
+    args = parser.parse_args()
+    generate_crosswalk(args.output_filename)
+
+if __name__ == "__main__":
+    main()
diff --git a/prepare-layers/make_arable_map.py b/prepare-layers/make_arable_map.py
new file mode 100644
index 0000000..0204694
--- /dev/null
+++ b/prepare-layers/make_arable_map.py
@@ -0,0 +1,111 @@
+import argparse 
+import itertools
+import os
+import shutil
+import tempfile
+from typing import Dict, List, Optional
+
+import numpy as np
+import pandas as pd
+from alive_progress import alive_bar
+from yirgacheffe.layers import RasterLayer
+
+# From Eyres et al: In the conversion scenario all habitats currently mapped as natural or pasture were converted to arable land
+IUCN_CODE_ARTIFICAL = [
+    "14", "14.3", "14.4", "14.5", "14.6"
+]
+ARABLE = "14.1"
+
+def load_crosswalk_table(table_file_name: str) -> Dict[str,int]:
+    rawdata = pd.read_csv(table_file_name)
+    result = {}
+    for _, row in rawdata.iterrows():
+        try:
+            result[row.code].append(int(row.value))
+        except KeyError:
+            result[row.code] = [int(row.value)]
+    return result
+
+
+def make_arable_map(
+    current_path: str,
+    crosswalk_path: str,
+    output_path: str,
+    concurrency: Optional[int],
+    show_progress: bool,
+) -> None:
+    with RasterLayer.layer_from_file(current_path) as current:
+        crosswalk = load_crosswalk_table(crosswalk_path)
+
+        map_preserve_code = list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_ARTIFICAL]))
+        # arable_code = crosswalk[ARABLE][0]
+        arable_code = 1401 # This is a hack as Daniele's crosswalk has 14.1 mapped to both 1400 and 1401 and there's no logical way
+        # to understand this
+
+        calc = current.numpy_apply(
+            lambda a: np.where(np.isin(a, map_preserve_code), a, arable_code)
+        )
+
+        with RasterLayer.empty_raster_layer_like(
+            current,
+            filename=output_path,
+            threads=16
+        ) as result:
+            if show_progress:
+                with alive_bar(manual=True) as bar:
+                    calc.parallel_save(result, callback=bar, parallelism=concurrency)
+            else:
+                calc.parallel_save(result, parallelism=concurrency)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Generate the arable scenario map.")
+    parser.add_argument(
+        '--current',
+        type=str,
+        help='Path of Jung L2 map',
+        required=True,
+        dest='current_path',
+    )
+    parser.add_argument(
+        '--crosswalk',
+        type=str,
+        help='Path of map to IUCN crosswalk table',
+        required=True,
+        dest='crosswalk_path',
+    )
+    parser.add_argument(
+        '--output',
+        type=str,
+        help='Path where final map should be stored',
+        required=True,
+        dest='results_path',
+    )
+    parser.add_argument(
+        '-j',
+        type=int,
+        help='Number of concurrent threads to use for calculation.',
+        required=False,
+        default=None,
+        dest='concurrency',
+    )
+    parser.add_argument(
+        '-p',
+        help="Show progress indicator",
+        default=False,
+        required=False,
+        action='store_true',
+        dest='show_progress',
+    )
+    args = parser.parse_args()
+
+    make_arable_map(
+        args.current_path,
+        args.crosswalk_path,
+        args.results_path,
+        args.concurrency,
+        args.show_progress,
+    )
+
+if __name__ == "__main__":
+    main()
diff --git a/prepare-layers/make_area_map.py b/prepare-layers/make_area_map.py
new file mode 100644
index 0000000..b38d235
--- /dev/null
+++ b/prepare-layers/make_area_map.py
@@ -0,0 +1,90 @@
+import argparse
+import math
+
+import numpy as np
+from osgeo import gdal
+from yirgacheffe.window import Area, PixelScale
+from yirgacheffe.layers import RasterLayer
+
+# Taken from https://gis.stackexchange.com/questions/127165/more-accurate-way-to-calculate-area-of-rasters
+def area_of_pixel(pixel_size, center_lat):
+    """Calculate m^2 area of a wgs84 square pixel.
+
+    Adapted from: https://gis.stackexchange.com/a/127327/2397
+
+    Parameters:
+        pixel_size (float): length of side of pixel in degrees.
+        center_lat (float): latitude of the center of the pixel. Note this
+            value +/- half the `pixel-size` must not exceed 90/-90 degrees
+            latitude or an invalid area will be calculated.
+
+    Returns:
+        Area of square pixel of side length `pixel_size` centered at
+        `center_lat` in m^2.
+
+    """
+    a = 6378137  # meters
+    b = 6356752.3142  # meters
+    e = math.sqrt(1 - (b/a)**2)
+    area_list = []
+    for f in [center_lat+pixel_size/2, center_lat-pixel_size/2]:
+        zm = 1 - e*math.sin(math.radians(f))
+        zp = 1 + e*math.sin(math.radians(f))
+        area_list.append(
+            math.pi * b**2 * (
+                math.log(zp/zm) / (2*e) +
+                math.sin(math.radians(f)) / (zp*zm)))
+    return pixel_size / 360. * (area_list[0] - area_list[1])
+
+def make_area_map(
+    pixel_scale: float,
+    output_path: str
+) -> None:
+    pixels = [0,] * math.floor(90.0 / pixel_scale)
+    for i in range(len(pixels)):
+        y = (i + 0.5) * pixel_scale
+        area = area_of_pixel(pixel_scale, y)
+        pixels[i] = area
+
+    allpixels = np.rot90(np.array([list(reversed(pixels)) + pixels]))
+
+    area = Area(
+        left=math.floor(180 / pixel_scale) * pixel_scale * -1.0,
+        right=((math.floor(180 / pixel_scale) - 1) * pixel_scale * -1.0),
+        top=(math.floor(90 / pixel_scale) * pixel_scale),
+        bottom=(math.floor(90 / pixel_scale) * pixel_scale * -1.0)
+    )
+    with RasterLayer.empty_raster_layer(
+        area,
+        PixelScale(pixel_scale, pixel_scale * -1.0),
+        gdal.GDT_Float32,
+        filename=output_path
+    ) as res:
+        res._dataset.WriteArray(allpixels, 0, 0)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Downsample habitat map to raster per terrain type.")
+    parser.add_argument(
+        "--scale",
+        type=float,
+        required=True,
+        dest="pixel_scale",
+        help="Output pixel scale value."
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        required=True,
+        dest="output_path",
+        help="Destination file for area raster."
+    )
+    args = parser.parse_args()
+
+    make_area_map(
+        args.pixel_scale,
+        args.output_path
+    )
+
+if __name__ == "__main__":
+    main()
diff --git a/prepare-layers/make_current_map.py b/prepare-layers/make_current_map.py
new file mode 100644
index 0000000..c4e1f7c
--- /dev/null
+++ b/prepare-layers/make_current_map.py
@@ -0,0 +1,111 @@
+import argparse 
+import itertools
+from typing import Dict, Optional
+from multiprocessing import Pool, cpu_count, set_start_method
+
+import numpy as np
+import pandas as pd
+from alive_progress import alive_bar
+from yirgacheffe.layers import RasterLayer
+
+# From Eyres et al: The current layer maps IUCN level 1 and 2 habitats, but habitats in the PNV layer are mapped only at IUCN level 1, 
+# so to estimate species’ proportion of original AOH now remaining we could only use natural habitats mapped at level 1 and artificial 
+# habitats at level 2.
+IUCN_CODE_ARTIFICAL = [
+    "14", "14.1", "14.2", "14.3", "14.4", "14.5", "14.6"
+]
+
+def load_crosswalk_table(table_file_name: str) -> Dict[str,int]:
+    rawdata = pd.read_csv(table_file_name)
+    result = {}
+    for _, row in rawdata.iterrows():
+        try:
+            result[row.code].append(int(row.value))
+        except KeyError:
+            result[row.code] = [int(row.value)]
+    return result
+
+
+def make_current_map(
+    current_path: str,
+    crosswalk_path: str,
+    output_path: str,
+    concurrency: Optional[int],
+    show_progress: bool,
+) -> None:
+    with RasterLayer.layer_from_file(current_path) as current:
+        crosswalk = load_crosswalk_table(crosswalk_path)
+
+        map_preserve_code = list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_ARTIFICAL]))
+
+        def filter(a):
+            import numpy as np
+            return np.where(np.isin(a, map_preserve_code), a, (np.floor(a / 100) * 100).astype(int))
+
+        calc = current.numpy_apply(filter)
+
+        with RasterLayer.empty_raster_layer_like(
+            current,
+            filename=output_path,
+            threads=16
+        ) as result:
+            if show_progress:
+                with alive_bar(manual=True) as bar:
+                    calc.parallel_save(result, callback=bar, parallelism=concurrency)
+            else:
+                calc.parallel_save(result, parallelism=concurrency)
+
+
+def main() -> None:
+    set_start_method("spawn")
+
+    parser = argparse.ArgumentParser(description="Zenodo resource downloader.")
+    parser.add_argument(
+        '--jung_l2',
+        type=str,
+        help='Path of Jung L2 map',
+        required=True,
+        dest='current_path',
+    )
+    parser.add_argument(
+        '--crosswalk',
+        type=str,
+        help='Path of map to IUCN crosswalk table',
+        required=True,
+        dest='crosswalk_path',
+    )
+    parser.add_argument(
+        '--output',
+        type=str,
+        help='Path where final map should be stored',
+        required=True,
+        dest='results_path',
+    )
+    parser.add_argument(
+        '-j',
+        type=int,
+        help='Number of concurrent threads to use for calculation.',
+        required=False,
+        default=None,
+        dest='concurrency',
+    )
+    parser.add_argument(
+        '-p',
+        help="Show progress indicator",
+        default=False,
+        required=False,
+        action='store_true',
+        dest='show_progress',
+    )
+    args = parser.parse_args()
+
+    make_current_map(
+        args.current_path,
+        args.crosswalk_path,
+        args.results_path,
+        args.concurrency,
+        args.show_progress,
+    )
+
+if __name__ == "__main__":
+    main()
diff --git a/prepare-layers/make_restore_map.py b/prepare-layers/make_restore_map.py
new file mode 100644
index 0000000..dfe3cab
--- /dev/null
+++ b/prepare-layers/make_restore_map.py
@@ -0,0 +1,128 @@
+import argparse 
+import itertools
+import sys
+from typing import Dict, Optional
+
+import numpy as np
+import pandas as pd
+from alive_progress import alive_bar
+from yirgacheffe.layers import RasterLayer, RescaledRasterLayer
+
+# From Eyres et al: In the restoration scenario all areas classified as arable or pasture were restored to their PNV
+IUCN_CODE_REPLACEMENTS = [
+    "14.1",
+    "14.2"
+]
+
+def load_crosswalk_table(table_file_name: str) -> Dict[str,int]:
+    rawdata = pd.read_csv(table_file_name)
+    result = {}
+    for _, row in rawdata.iterrows():
+        try:
+            result[row.code].append(int(row.value))
+        except KeyError:
+            result[row.code] = [int(row.value)]
+    return result
+
+
+def make_restore_map(
+    pnv_path: str,
+    current_path: str,
+    crosswalk_path: str,
+    output_path: str,
+    concurrency: Optional[int],
+    show_progress: bool,
+) -> None:
+    with RasterLayer.layer_from_file(current_path) as current:
+        with RescaledRasterLayer.layer_from_file(pnv_path, current.pixel_scale) as pnv:
+            crosswalk = load_crosswalk_table(crosswalk_path)
+
+            map_replacement_codes = list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_REPLACEMENTS]))
+
+            try:
+                intersection = RasterLayer.find_intersection([pnv, current])
+            except ValueError:
+                print(f"Layers do not match in pixel scale or projection:\n", file=sys.stderr)
+                print(f"\t{pnv_path}: {pnv.pixel_scale}, {pnv.projection}")
+                print(f"\t{current_path}: {current.pixel_scale}, {current.projection}")
+                sys.exit(-1)
+
+            for layer in [pnv, current]:
+                layer.set_window_for_intersection(intersection)
+
+            calc = current.numpy_apply(
+                lambda a, b: np.where(np.isin(a, map_replacement_codes), b, a),
+                pnv
+            )
+
+            with RasterLayer.empty_raster_layer_like(
+                current,
+                filename=output_path,
+                threads=16
+            ) as result:
+                if show_progress:
+                    with alive_bar(manual=True) as bar:
+                        calc.parallel_save(result, callback=bar, parallelism=concurrency)
+                else:
+                    calc.parallel_save(result, parallelism=concurrency)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Zenodo resource downloader.")
+    parser.add_argument(
+        '--pnv',
+        type=str,
+        help='Path of PNV map',
+        required=True,
+        dest='pnv_path',
+    )
+    parser.add_argument(
+        '--currentl2',
+        type=str,
+        help='Path of current L2 map',
+        required=True,
+        dest='current_path',
+    )
+    parser.add_argument(
+        '--crosswalk',
+        type=str,
+        help='Path of map to IUCN crosswalk table',
+        required=True,
+        dest='crosswalk_path',
+    )
+    parser.add_argument(
+        '--output',
+        type=str,
+        help='Path where final map should be stored',
+        required=True,
+        dest='results_path',
+    )
+    parser.add_argument(
+        '-j',
+        type=int,
+        help='Number of concurrent threads to use for calculation.',
+        required=False,
+        default=None,
+        dest='concurrency',
+    )
+    parser.add_argument(
+        '-p',
+        help="Show progress indicator",
+        default=False,
+        required=False,
+        action='store_true',
+        dest='show_progress',
+    )
+    args = parser.parse_args()
+
+    make_restore_map(
+        args.pnv_path,
+        args.current_path,
+        args.crosswalk_path,
+        args.results_path,
+        args.concurrency,
+        args.show_progress,
+    )
+
+if __name__ == "__main__":
+    main()
diff --git a/prepare-species/cleaning.py b/prepare-species/cleaning.py
new file mode 100644
index 0000000..22d7f40
--- /dev/null
+++ b/prepare-species/cleaning.py
@@ -0,0 +1,34 @@
+import math
+
+import pandas as pd
+
+ELEVATION_MIN = -500
+ELEVATION_MAX = 9000
+
+def tidy_data(row: pd.Series) -> pd.Series:
+    """Tidy up the data as per Busana et al"""
+
+    # Missing lower and/or upper elevation
+    if row.elevation_lower is None:
+        row.elevation_lower = ELEVATION_MIN
+    if row.elevation_upper is None:
+        row.elevation_upper = ELEVATION_MAX
+
+    # Lower elevation < -500 and/or upper elevation > 9000
+    row.elevation_lower = max(ELEVATION_MIN, row.elevation_lower)
+    row.elevation_upper = min(ELEVATION_MAX, row.elevation_upper)
+
+    # Lower elevation higher than upper elevation
+    if row.elevation_lower > row.elevation_upper:
+        row.elevation_lower = ELEVATION_MIN
+        row.elevation_upper = ELEVATION_MAX
+
+    # Small difference (<50m) between lower and upper elevation
+    elevation_diff = row.elevation_upper - row.elevation_lower
+    if elevation_diff < 50.0:
+        spare = 50.0 - elevation_diff
+        adjust = math.ceil(spare / 2.0)
+        row.elevation_lower -= adjust
+        row.elevation_upper += adjust
+
+    return row
diff --git a/prepare-species/extract_species_psql.py b/prepare-species/extract_species_psql.py
new file mode 100644
index 0000000..5c6e473
--- /dev/null
+++ b/prepare-species/extract_species_psql.py
@@ -0,0 +1,145 @@
+import argparse
+import os
+from typing import Optional
+
+# import pyshark # pylint: disable=W0611
+import geopandas as gpd
+import pyproj
+from sqlalchemy import create_engine, text
+
+from cleaning import tidy_data
+
+SEASON_NAME = {
+    1: "RESIDENT",
+    2: "BREEDING",
+    3: "NONBREEDING",
+}
+
+STATEMENT = """
+WITH habitat_seasons AS (
+	SELECT
+        assessment_habitats.assessment_id,
+        assessment_habitats.habitat_id,
+        CASE
+            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE 'Resident' THEN 1
+            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE 'Breeding%' THEN 2
+            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE 'Non%Breed%' THEN 3
+            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE 'Pass%' THEN 4
+            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE '%un%n%' THEN 1 -- capture 'uncertain' and 'unknown' as resident
+            ELSE 1
+        END AS seasonal
+    FROM
+        public.assessments
+        LEFT JOIN taxons ON taxons.id = assessments.taxon_id
+        LEFT JOIN assessment_habitats ON assessment_habitats.assessment_id = assessments.id
+    WHERE
+        assessments.latest = true
+),
+unique_seasons AS (
+  	SELECT DISTINCT ON (taxons.scientific_name, habitat_seasons.seasonal)
+        assessments.sis_taxon_id as id_no,
+        assessment_ranges.seasonal,
+        assessment_ranges.presence,
+        assessment_ranges.origin,
+        STRING_AGG(habitat_lookup.code, '|') OVER (PARTITION BY taxons.scientific_name, habitat_seasons.seasonal ORDER BY assessment_ranges.id) AS full_habitat_code,
+        STRING_AGG(system_lookup.description->>'en', '|') OVER (PARTITION BY taxons.scientific_name, habitat_seasons.seasonal ORDER BY assessment_ranges.id) AS systems,
+        (ST_COLLECT(assessment_ranges.geom::geometry) OVER (PARTITION BY taxons.scientific_name, habitat_seasons.seasonal ORDER BY assessment_ranges.id))::geography AS geometry,
+        (assessment_supplementary_infos.supplementary_fields->>'ElevationLower.limit')::numeric AS elevation_lower,
+        (assessment_supplementary_infos.supplementary_fields->>'ElevationUpper.limit')::numeric AS elevation_upper,
+        ROW_NUMBER() OVER (PARTITION BY taxons.scientific_name, habitat_seasons.seasonal ORDER BY assessments.id, assessment_ranges.id) AS rn
+    FROM
+        assessments
+        LEFT JOIN taxons ON taxons.id = assessments.taxon_id
+        LEFT JOIN assessment_ranges ON assessment_ranges.assessment_id = assessments.id
+        LEFT JOIN habitat_seasons ON habitat_seasons.assessment_id = assessments.id AND habitat_seasons.seasonal = assessment_ranges.seasonal
+        LEFT JOIN assessment_systems ON assessment_systems.assessment_id = assessments.id
+        LEFT JOIN system_lookup ON assessment_systems.system_lookup_id = system_lookup.id
+        LEFT JOIN habitat_lookup ON habitat_lookup.id = habitat_seasons.habitat_id
+        LEFT JOIN assessment_supplementary_infos ON assessment_supplementary_infos.assessment_id = assessments.id
+        LEFT JOIN red_list_category_lookup ON red_list_category_lookup.id = assessments.red_list_category_id
+    WHERE
+        assessments.latest = true
+        AND taxons.class_id = 22672813 -- AVES
+        AND habitat_seasons.habitat_id is not null
+        AND assessment_ranges.presence IN {presence}
+        AND assessment_ranges.origin IN (1, 2, 6)
+        AND assessment_ranges.seasonal IN (1, 2, 3)
+        AND red_list_category_lookup.code != 'EX'
+    )
+SELECT
+    id_no,
+    seasonal,
+    elevation_lower,
+    elevation_upper,
+    full_habitat_code,
+    geometry
+FROM
+    unique_seasons
+WHERE
+    rn = 1
+    -- the below queries must happen on the aggregate data
+    AND full_habitat_code NOT LIKE '7%'
+    AND full_habitat_code NOT LIKE '%|7%'
+    AND systems NOT LIKE '%Marine%'
+LIMIT 50
+"""
+
+CURRENT_STATEMENT = STATEMENT.format(presence="(1, 2)")
+HISTORIC_STATEMENT = STATEMENT.format(presence="(1, 2, 4, 5)")
+
+DB_HOST = os.getenv("DB_HOST")
+DB_PORT = os.getenv("DB_PORT", "5432")
+DB_NAME = os.getenv("DB_NAME")
+DB_USER = os.getenv("DB_USER")
+DB_PASSWORD = os.getenv("DB_PASSWORD")
+DB_CONFIG = (
+	f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
+)
+
+def extract_data_per_species(
+    output_directory_path: str,
+    target_projection: Optional[str],
+) -> None:
+
+    # The geometry is in CRS 4326, but the AoH work is done in World_Behrmann, aka Projected CRS: ESRI:54017
+    src_crs = pyproj.CRS.from_epsg(4326)
+    target_crs = src_crs #pyproj.CRS.from_string(target_projection)
+
+    engine = create_engine(DB_CONFIG, echo=False)
+    for era, statement in ("current", CURRENT_STATEMENT), ("historic", HISTORIC_STATEMENT):
+        os.makedirs(os.path.join(output_directory_path, era), exist_ok=True)
+        dfi = gpd.read_postgis(text(statement), con=engine, geom_col="geometry", chunksize=1024)
+        for df in dfi:
+            for _, raw in df.iterrows():
+                row = tidy_data(raw)
+                output_path = os.path.join(output_directory_path, era, f"{row.id_no}_{SEASON_NAME[row.seasonal]}.geojson")
+                res = gpd.GeoDataFrame(row.to_frame().transpose(), crs=src_crs, geometry="geometry")
+                res_projected = res.to_crs(target_crs)
+                res_projected.to_file(output_path, driver="GeoJSON")
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Process agregate species data to per-species-file.")
+    parser.add_argument(
+        '--output',
+        type=str,
+        help='Directory where per species Geojson is stored',
+        required=True,
+        dest='output_directory_path',
+    )
+    parser.add_argument(
+        '--projection',
+        type=str,
+        help="Target projection",
+        required=False,
+        dest="target_projection",
+        default="ESRI:54017"
+    )
+    args = parser.parse_args()
+
+    extract_data_per_species(
+        args.output_directory_path,
+        args.target_projection
+    )
+
+if __name__ == "__main__":
+    main()
diff --git a/prepare-species/species_prep.py b/prepare-species/species_prep.py
new file mode 100644
index 0000000..b4b23b8
--- /dev/null
+++ b/prepare-species/species_prep.py
@@ -0,0 +1,144 @@
+import argparse
+import os
+from enum import Enum
+from typing import List, Optional, Any, Tuple
+
+import geopandas as gpd
+import pandas as pd
+from shapely.ops import transform
+from pyproj import Transformer, CRS
+# import pyshark # pylint: disable=W0611
+
+import seasonality
+from iucn_modlib.classes.Taxon import Taxon
+from iucn_modlib.factories import TaxonFactories
+
+from cleaning import tidy_data
+
+class Seasonality(Enum):
+    RESIDENT = "resident"
+    BREEDING = "breeding"
+    NONBREEDING = "nonbreeding"
+
+    @property
+    def iucn_seasons(self) -> Tuple:
+        if self.value == 'resident':
+            return ('Resident', 'Seasonal Occurrence Unknown')
+        elif self.value == 'breeding':
+            return ('Resident', 'Breeding Season', 'Seasonal Occurrence Unknown')
+        elif self.value == 'nonbreeding':
+            return ('Resident', 'Non-Breeding Season', 'Seasonal Occurrence Unknown')
+        else:
+            raise NotImplementedError(f'Unhandled seasonlity value {self.value}')
+
+
+def seasonality_for_species(species: Taxon, range_file: str) -> Set[str]: 
+    og_seasons = set(
+        seasonality.habitatSeasonality(species) +
+        seasonality.rangeSeasonality(range_file, species.taxonid)
+    )
+    if len(og_seasons) == 0:
+        return {}
+    seasons = {'resident'}
+    if len(og_seasons.difference({'resident'})) > 0:
+        seasons = {'breeding', 'nonbreeding'}
+    return seasons
+
+
+def extract_data_per_species(
+    specieslist_path: str,
+    speciesdata_path: str,
+    iucn_data_batch: str,
+    target_projection: Optional[str],
+    output_directory_path: str,
+) -> None:
+    os.makedirs(output_directory_path, exist_ok=True)
+
+    species_list = pd.read_csv(specieslist_path, index_col=0)
+    batch = TaxonFactories.loadBatchSource(iucn_data_batch)
+    species_data = gpd.read_file(speciesdata_path)
+
+    for species_id in species_list["taxid"]:
+        try:
+            species = TaxonFactories.TaxonFactoryRedListBatch(species_id, batch)
+        except IndexError:
+            # Some of the data in the batch needs tidy...
+            print(f'{species_id} not in batch')
+            continue
+
+        seasonality_list = seasonality_for_species(species, speciesdata_path)
+        for seasonality in seasonality_list:
+            filename = f'{seasonality}-{species.taxonid}.geojson'
+
+
+
+
+    subset_of_interest = species_data[[
+        "id_no",
+        "seasonal",
+        "elevation_lower",
+        "elevation_upper",
+        "full_habitat_code",
+        "geometry"
+    ]]
+
+
+    for _, raw in subset_of_interest.iterrows():
+        row = tidy_data(raw)
+        if target_projection:
+            transformer = Transformer.from_crs(species_data.crs, CRS(target_projection))
+            new_geom = transform(transformer.transform, row.geometry)
+            row.geometry = new_geom
+        output_path = os.path.join(output_directory_path, f"{row.id_no}_{row.seasonal}.geojson")
+        res = gpd.GeoDataFrame(row.to_frame().transpose(), crs=CRS(target_projection), geometry="geometry")
+        res.to_file(output_path, driver="GeoJSON")
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Process agregate species data to per-species-per-season for LIFE.")
+    parser.add_argument(
+        '--species',
+        type=str,
+        help='Selected list of species for evaluation',
+        required=True,
+        dest="species_list",
+    )
+    parser.add_argument(
+        '--rangedata',
+        type=str,
+        help="Processed species range data",
+        required=True,
+        dest="speciesdata_path",
+    )
+    parser.add_argument(
+        '--iucnbatch',
+        type=str,
+        help="IUCN download batch",
+        required=True,
+        dest="iucn_data_batch",
+    )
+    parser.add_argument(
+        '--projection',
+        type=str,
+        help="Target projection",
+        required=False,
+        dest="target_projection"
+    )
+    parser.add_argument(
+        '--output',
+        type=str,
+        help='Directory where per species Geojson is stored',
+        required=True,
+        dest='output_directory_path',
+    )
+    args = parser.parse_args()
+
+    extract_data_per_species(
+        args.species_list,
+        args.speciesdata_path,
+        args.iucn_data_batch,
+        args.target_projection,
+        args.output_directory_path
+    )
+
+if __name__ == "__main__":
+    main()

From 3aec8ec3ac4acf11e5d014b7ed623c0a09a7cd9b Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Wed, 11 Sep 2024 16:43:27 +0100
Subject: [PATCH 06/36] Add more manual stages

---
 deltap/delta_p_scaled_area.py                |  59 +++++
 deltap/global_code_residents_pixel_AE_128.py | 222 +++++++++++++++++++
 method.md                                    |  86 ++++++-
 prepare-layers/make_diff_map.py              |  88 ++++++++
 4 files changed, 451 insertions(+), 4 deletions(-)
 create mode 100644 deltap/delta_p_scaled_area.py
 create mode 100644 deltap/global_code_residents_pixel_AE_128.py
 create mode 100644 prepare-layers/make_diff_map.py

diff --git a/deltap/delta_p_scaled_area.py b/deltap/delta_p_scaled_area.py
new file mode 100644
index 0000000..04b7e3b
--- /dev/null
+++ b/deltap/delta_p_scaled_area.py
@@ -0,0 +1,59 @@
+import argparse
+
+import numpy as np
+from yirgacheffe.layers import RasterLayer
+
+SCALE = 1e6
+
+def delta_p_scaled_area(
+    input_path: str,
+    diff_area_map_path: str,
+    output_path: str,
+):
+    with RasterLayer.layer_from_file(diff_area_map_path) as area_restore:
+        with RasterLayer.layer_from_file(input_path) as inlayer:
+
+            intersection = RasterLayer.find_intersection([area_restore, inlayer])
+            inlayer.set_window_for_intersection(intersection)
+            area_restore.set_window_for_intersection(intersection)
+
+            with RasterLayer.empty_raster_layer_like(inlayer, filename=output_path, nodata=float('nan')) as result:
+
+                area_restore_filter = area_restore.numpy_apply(lambda c: np.where(c < SCALE, 0, c)) / SCALE
+                filtered_layer = inlayer.numpy_apply(lambda il, af: np.where(af != 0, il, 0), area_restore_filter)
+                scaled_filtered_layer = filtered_layer / area_restore_filter
+                scaled_filtered_layer.save(result)
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Scale final results.")
+    parser.add_argument(
+        '--input',
+        type=str,
+        help='Path of map of extinction risk',
+        required=True,
+        dest='current_path',
+    )
+    parser.add_argument(
+        '--diffmap',
+        type=str,
+        help='Path of map of scenario difference scaled by area',
+        required=True,
+        dest='diff_area_map_path',
+    )
+    parser.add_argument(
+        '--output',
+        type=str,
+        help='Path where final map should be stored',
+        required=True,
+        dest='output_path',
+    )
+    args = parser.parse_args()
+
+    delta_p_scaled_area(
+        args.input_path,
+        args.diff_area_map_path,
+        args.output_path
+    )
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/deltap/global_code_residents_pixel_AE_128.py b/deltap/global_code_residents_pixel_AE_128.py
new file mode 100644
index 0000000..573017d
--- /dev/null
+++ b/deltap/global_code_residents_pixel_AE_128.py
@@ -0,0 +1,222 @@
+import argparse
+import math
+import os
+import types
+
+import geopandas as gpd
+import numpy as np
+from osgeo import gdal
+from yirgacheffe.layers import RasterLayer, ConstantLayer
+
+GOMPERTZ_A = 2.5
+GOMPERTZ_B = -14.5
+GOMPERTZ_ALPHA = 1
+
+seasons = types.SimpleNamespace()
+seasons.RESIDENT = 1
+seasons.BREEDING = 2
+seasons.NONBREEDING = 3
+
+def gen_gompertz(x: float) -> float:
+    return math.exp(-math.exp(GOMPERTZ_A + (GOMPERTZ_B * (x ** GOMPERTZ_ALPHA))))
+
+def numpy_gompertz(x: float) -> float:
+    return np.exp(-np.exp(GOMPERTZ_A + (GOMPERTZ_B * (x ** GOMPERTZ_ALPHA))))
+
+def open_layer_as_float64(filename: str) -> RasterLayer:
+    if filename == "nan":
+        return ConstantLayer(0.0)
+    layer = RasterLayer.layer_from_file(filename)
+    if layer.datatype == gdal.GDT_Float64:
+        return layer
+    layer64 = RasterLayer.empty_raster_layer_like(layer, datatype=gdal.GDT_Float64)
+    layer.save(layer64)
+    return layer64
+
+def calc_persistence_value(current_AOH: float, historic_AOH: float, exponent_func) -> float:
+    sp_P = exponent_func(current_AOH / historic_AOH)
+    sp_P_fix = np.where(sp_P > 1, 1, sp_P)
+    return sp_P_fix
+
+def process_delta_p(current: RasterLayer, scenario: RasterLayer, current_AOH: float, historic_AOH: float, exponent_func_raster) -> RasterLayer:
+    # In theory we could recalc current_AOH, but given we already have it don't duplicate work
+    # New section added in: Calculating for rasters rather than csv's
+    const_layer = ConstantLayer(current_AOH) # MAKE A LAYER WITH THE SAME PROPERTIES AS CURRENT AOH RASTER BUT FILLED WITH THE CURRENT AOH
+    calc_1 = (const_layer - current) + scenario # FIRST CALCULATION : NEW AOH
+    new_AOH = RasterLayer.empty_raster_layer_like(current)
+    calc_1.save(new_AOH)
+
+    calc_2 = (new_AOH / historic_AOH).numpy_apply(exponent_func_raster)
+    calc_2 = calc_2.numpy_apply(lambda chunk: np.where(chunk > 1, 1, chunk))
+    new_p = RasterLayer.empty_raster_layer_like(new_AOH)
+    calc_2.save(new_p)
+
+    return new_p
+
+def global_code_residents_pixel_ae(
+    species_data_path: str,
+    current_aohs_path: str,
+    scenario_aohs_path: str,
+    historic_aohs_path: str,
+    exponent: str,
+    output_folder: str,
+) -> None:
+    os.makedirs(output_folder, exist_ok=True)
+
+    os.environ["OGR_GEOJSON_MAX_OBJ_SIZE"] = "0"
+    try:
+        filtered_species_info = gpd.read_file(species_data_path)
+    except: # pylint:disable=W0702
+        quit(f"Failed to read {species_data_path}")
+    taxid = filtered_species_info.id_no.values[0]
+    season = filtered_species_info.seasonal.values[0]
+
+    try:
+        exp_val = float(exponent)
+        z_exponent_func_float = lambda x: np.float_power(x, exp_val)
+        z_exponent_func_raster = lambda x: np.float_power(x, exp_val)
+    except ValueError:
+        if exponent == "gompertz":
+            z_exponent_func_float = gen_gompertz
+            z_exponent_func_raster = numpy_gompertz
+        else:
+            quit(f"unrecognised exponent {exponent}")
+
+    match season:
+        case seasons.RESIDENT:
+            filename = f"{taxid}_{season}.tif"
+            try:
+                current = open_layer_as_float64(os.path.join(current_aohs_path, filename))
+                scenario = open_layer_as_float64(os.path.join(scenario_aohs_path, filename))
+                historic_AOH = RasterLayer.layer_from_file(os.path.join(historic_aohs_path, filename)).sum()
+            except FileNotFoundError as fnf:
+                quit(f"Failed to open {fnf.filename}")
+
+            if historic_AOH == 0.0:
+                quit(f"Historic AoH for {taxid} is zero, aborting")
+
+            layers = [current, scenario]
+            union = RasterLayer.find_union(layers)
+            for layer in layers:
+                try:
+                    layer.set_window_for_union(union)
+                except ValueError:
+                    pass
+
+            current_AOH = current.sum()
+
+            new_p_layer = process_delta_p(current, scenario, current_AOH, historic_AOH, z_exponent_func_raster)
+
+            old_persistence = calc_persistence_value(current_AOH, historic_AOH, z_exponent_func_float)
+            calc = new_p_layer - ConstantLayer(old_persistence)
+            delta_p = RasterLayer.empty_raster_layer_like(new_p_layer, filename=os.path.join(output_folder, filename))
+            calc.save(delta_p)
+
+        case seasons.NONBREEDING:
+            nonbreeding_filename = f"{taxid}_{seasons.NONBREEDING}.tif"
+            breeding_filename = f"{taxid}_{seasons.BREEDING}.tif"
+
+            historic_AOH_breeding = RasterLayer.layer_from_file(os.path.join(historic_aohs_path, breeding_filename)).sum()
+            if historic_AOH_breeding == 0.0:
+                quit(f"Historic AoH breeding for {taxid} is zero, aborting")
+            historic_AOH_non_breeding = RasterLayer.layer_from_file(os.path.join(historic_aohs_path, nonbreeding_filename)).sum()
+            if historic_AOH_non_breeding == 0.0:
+                quit(f"Historic AoH for non breeding {taxid} is zero, aborting")
+
+
+            if scenario_aohs_path != "nan":
+                non_breeding_scenario_path = os.path.join(scenario_aohs_path, nonbreeding_filename)
+                breeding_scenario_path = os.path.join(scenario_aohs_path, breeding_filename)
+            else:
+                non_breeding_scenario_path = "nan"
+                breeding_scenario_path = "nan"
+
+            try:
+                current_breeding = open_layer_as_float64(os.path.join(current_aohs_path, breeding_filename))
+                current_non_breeding = open_layer_as_float64(os.path.join(current_aohs_path, nonbreeding_filename))
+                scenario_breeding = open_layer_as_float64(breeding_scenario_path)
+                scenario_non_breeding = open_layer_as_float64(non_breeding_scenario_path)
+            except FileNotFoundError as fnf:
+                quit(f"Failed to open {fnf.filename}")
+
+            layers = [current_breeding, current_non_breeding, scenario_breeding, scenario_non_breeding]
+            union = RasterLayer.find_union(layers)
+            for layer in layers:
+                try:
+                    layer.set_window_for_union(union)
+                except ValueError:
+                    pass
+
+            current_AOH_breeding = current_breeding.sum()
+            persistence_breeding = calc_persistence_value(current_AOH_breeding, historic_AOH_breeding, z_exponent_func_float)
+
+            current_AOH_non_breeding = current_non_breeding.sum()
+            persistence_non_breeding = calc_persistence_value(current_AOH_non_breeding, historic_AOH_non_breeding, z_exponent_func_float)
+
+            old_persistence = (persistence_breeding ** 0.5) * (persistence_non_breeding ** 0.5)
+
+            new_p_breeding = process_delta_p(current_breeding, scenario_breeding, current_AOH_breeding, historic_AOH_breeding, z_exponent_func_raster)
+            new_p_non_breeding = process_delta_p(current_non_breeding, scenario_non_breeding, current_AOH_non_breeding, historic_AOH_non_breeding, z_exponent_func_raster)
+
+            new_p_layer = (new_p_breeding ** 0.5) * (new_p_non_breeding ** 0.5)
+
+            delta_p_layer = new_p_layer - ConstantLayer(old_persistence)
+
+            output = RasterLayer.empty_raster_layer_like(new_p_breeding, filename=args['output_path'])
+            delta_p_layer.save(output)
+
+        case seasons.BREEDING:
+            pass # covered by the nonbreeding case
+        case _:
+            quit(f"Expected season for species {taxid}: {season}")
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--speciesdata',
+        type=str,
+        help="Single species/seasonality geojson",
+        required=True,
+        dest="species_data_path"
+    )
+    parser.add_argument(
+        '--current_path',
+        type=str,
+        required=True,
+        dest="current_path",
+        help="path to species current AOH hex"
+    )
+    parser.add_argument(
+        '--scenario_path',
+        type=str,
+        required=True,
+        dest="scenario_path",
+        help="path to species scenario AOH hex"
+    )
+    parser.add_argument(
+        '--historic_path',
+        type=str,
+        required=False,
+        dest="historic_path",
+        help="path to species historic AOH hex"
+    )
+    parser.add_argument('--output_path',
+        type=str,
+        required=True,
+        dest="output_path",
+        help="path to save output csv"
+    )
+    parser.add_argument('--z', dest='exponent', type=str, default='0.25')
+    args = parser.parse_args()
+
+    global_code_residents_pixel_ae(
+        args.species_data_path,
+        args.current_path,
+        args.scenario_path,
+        args.historic_path.
+        args.exponent,
+        args.output_path,
+    )
+
+if __name__ == "__main__":
+    main()
diff --git a/method.md b/method.md
index 7a47b81..c2ff283 100644
--- a/method.md
+++ b/method.md
@@ -5,7 +5,6 @@ path: /root
 
 ## Build the environment
 
-
 ### The geospatial compute container
 
 The dockerfile that comes with the repo should be used to run the pipeline.
@@ -62,9 +61,8 @@ For querying the IUCN data held in the PostGIS database we use a seperate contai
  (run (network host) (shell "apt-get update -qqy && apt-get -y install libpq-dev gcc git && rm -rf /var/lib/apt/lists/* && rm -rf /var/cache/apt/*"))
  (run (network host) (shell "pip install psycopg2 SQLalchemy geopandas"))
  (run (network host) (shell "pip install git+https://github.com/quantifyearth/pyshark"))
- (copy (src "./") (dst "/root/"))
+ (copy (src "./prepare-species") (dst "/root/"))
  (workdir "/root/")
- (run (shell "chmod 755 *.py"))
 )
 ```
 
@@ -165,6 +163,38 @@ python3 ./aoh-calculator/habitat_process.py --habitat /data/habitat/arable.tif \
                                             --output /data/habitat_maps/arable/
 ```
 
+### Generate area map
+
+For LIFE we need to know the actual area, not just pixel count. For this we generate a map that contains the area per pixel for a given latitude which is one pixel wide, and then we sweep that across for a given longitude.
+
+```shark-run:aohbuilder
+python3 ./prepare-layers/make_area_map.py --scale 0.016666666666667 --output /data/area-per-pixel.tif
+```
+
+### Differences maps
+
+In the algorithm we use need to account for map projection distortions, so all values in the AoHs are based on the area per pixel. To get the final extinction risk values we must remove that scaling. To do that we generate a map of area difference from current for the given scenario.
+
+```shark-run:aohbuilder
+python3 ./prepare-layers/make_diff_map.py --current /data/habitat/current_raw.tif \
+                                          --scenario /data/habitat/restore.tif \
+                                          --output /data/habitat/restore_diff_raw.tif
+
+gdalwarp -t_srs EPSG:4326 -tr 0.016666666666667 -0.016666666666667 -r min -co COMPRESS=LZW -wo NUM_THREADS=40 /data/habitat/restore_diff_raw.tif /data/habitat/restore_diff.tif
+
+gdal_calc -A /data/habitat/restore_diff.tif -B /data/area-per-pixel.tif --out /data/habitat/restore_diff_area.tif --calc="A*B"
+```
+
+```shark-run:aohbuilder
+python3 ./prepare-layers/make_diff_map.py --current /data/habitat/current_raw.tif \
+                                          --scenario /data/habitat/arable.tif \
+                                          --output /data/habitat/arable_diff_raw.tif
+
+gdalwarp -t_srs EPSG:4326 -tr 0.016666666666667 -0.016666666666667 -r min -co COMPRESS=LZW -wo NUM_THREADS=40 /data/habitat/arable_diff_raw.tif /data/habitat/arable_diff.tif
+
+gdal_calc -A /data/habitat/arable_diff.tif -B /data/area-per-pixel.tif --out /data/habitat/arable_diff_area.tif --calc="A*B"
+```
+
 
 ### Fetching the elevation map
 
@@ -210,6 +240,7 @@ This step generates a single AoH raster for a single one of the above GeoJSON fi
 python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/current/ \
                                     --elevation-max /data/elevation-max-1k.tif \
                                     --elevation-min /data/elevation-min-1k.tif \
+                                    --area /data/area-per-pixel.tif \
                                     --crosswalk /data/crosswalk.csv \
                                     --speciesdata /data/species-info/current/* \
                                     --output /data/aohs/current/
@@ -217,6 +248,7 @@ python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/current/ \
 python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/restore/ \
                                     --elevation-max /data/elevation-max-1k.tif \
                                     --elevation-min /data/elevation-min-1k.tif \
+                                    --area /data/area-per-pixel.tif \
                                     --crosswalk /data/crosswalk.csv \
                                     --speciesdata /data/species-info/current/* \
                                     --output /data/aohs/restore/
@@ -224,6 +256,7 @@ python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/restore/ \
 python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/arable/ \
                                     --elevation-max /data/elevation-max-1k.tif \
                                     --elevation-min /data/elevation-min-1k.tif \
+                                    --area /data/area-per-pixel.tif \
                                     --crosswalk /data/crosswalk.csv \
                                     --speciesdata /data/species-info/current/* \
                                     --output /data/aohs/arable/
@@ -231,6 +264,7 @@ python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/arable/ \
 python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/pnv/ \
                                     --elevation-max /data/elevation-max-1k.tif \
                                     --elevation-min /data/elevation-min-1k.tif \
+                                    --area /data/area-per-pixel.tif \
                                     --crosswalk /data/crosswalk.csv \
                                     --speciesdata /data/species-info/historic/* \
                                     --output /data/aohs/pnv/
@@ -238,7 +272,7 @@ python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/pnv/ \
 
 The results you then want will all be in:
 
-```shark-publish
+```shark-publish2
 /data/aohs/current/
 /data/aohs/restore/
 /data/aohs/arable/
@@ -248,3 +282,47 @@ The results you then want will all be in:
 
 ## Calculating persistence maps
 
+For each species we use the AoH data to calculate the likelihood of extinction under two scenarios: restoration and conseravation. To do that we work out the delta_p value per species, and then sum together all those results per species into a single layer.
+
+
+```shark-run:aohbuilder
+python3 ./deltap/global_code_residents_pixel_AE_128.py --speciesdata /data/species-info/current/* \
+                                                       --current-path /data/aohs/current/ \
+                                                       --scenario-path /data/aohs/restore/ \
+                                                       --historic-path /data/aohs/pnv/ \
+                                                       --z 0.25 \
+                                                       --output /data/deltap/restore/
+
+python3 ./utils/raster_sum.py --input /data/deltap/restore/ --output /data/deltap/restore_0.25.tif
+
+python3 ./deltap/global_code_residents_pixel_AE_128.py --speciesdata /data/species-info/current/* \
+                                                       --current-path /data/aohs/current/ \
+                                                       --scenario-path /data/aohs/arable/ \
+                                                       --historic-path /data/aohs/pnv/ \
+                                                       --z 0.25 \
+                                                       --output /data/deltap/arable/
+
+python3 ./utils/raster_sum.py --input /data/deltap/arable/ --output /data/deltap/arable_0.25.tif
+```
+
+```shark-publish2
+/data/deltap/restore/
+/data/deltap/arable/
+```
+
+Finally, we need to scale the results for publication:
+
+```shark-run:aohbuilder
+python3 ./deltap/delta_p_hectare.py --input /data/deltap/restore_0.25.tif \
+                                    --diffmap /data/habitat/restore_diff_area.tif \
+                                    --output /data/deltap/scaled_restore_0.25.tif
+
+python3 ./deltap/delta_p_hectare.py --input /data/deltap/arable_0.25.tif \
+                                    --diffmap /data/habitat/arable_diff_area.tif \
+                                    --output /data/deltap/scaled_arable_0.25.tif
+```
+
+```shark-publish
+/data/deltap/scaled_restore_0.25.tif
+/data/deltap/scaled_arable_0.25.tif
+```
\ No newline at end of file
diff --git a/prepare-layers/make_diff_map.py b/prepare-layers/make_diff_map.py
new file mode 100644
index 0000000..9e2fbf4
--- /dev/null
+++ b/prepare-layers/make_diff_map.py
@@ -0,0 +1,88 @@
+import argparse 
+from typing import Dict, List, Optional
+
+from osgeo import gdal
+from alive_progress import alive_bar
+from yirgacheffe.layers import RasterLayer
+
+def make_arable_map(
+    current_path: str,
+    scenario_path: str,
+    output_path: str,
+    concurrency: Optional[int],
+    show_progress: bool,
+) -> None:
+    with RasterLayer.layer_from_file(current_path) as current:
+        with RasterLayer.layer_from_file(scenario_path) as scenario:
+
+            layers = [current, scenario]
+            intersection = RasterLayer.find_intersection(layers)
+            for layer in layers:
+                layer.set_window_for_intersection(intersection)
+
+            calc = current.numpy_apply(lambda a, b: a != b)
+
+            with RasterLayer.empty_raster_layer_like(
+                current,
+                filename=output_path,
+                datatype=gdal.GDT_Float32,
+                threads=16
+            ) as result:
+                if show_progress:
+                    with alive_bar(manual=True) as bar:
+                        calc.parallel_save(result, callback=bar, parallelism=concurrency)
+                else:
+                    calc.parallel_save(result, parallelism=concurrency)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Generate the arable scenario map.")
+    parser.add_argument(
+        '--current',
+        type=str,
+        help='Path of Jung L2 map',
+        required=True,
+        dest='current_path',
+    )
+    parser.add_argument(
+        '--scenario',
+        type=str,
+        help='Path of the scenario map',
+        required=True,
+        dest='scenario_path',
+    )
+    parser.add_argument(
+        '--output',
+        type=str,
+        help='Path where final map should be stored',
+        required=True,
+        dest='results_path',
+    )
+    parser.add_argument(
+        '-j',
+        type=int,
+        help='Number of concurrent threads to use for calculation.',
+        required=False,
+        default=None,
+        dest='concurrency',
+    )
+    parser.add_argument(
+        '-p',
+        help="Show progress indicator",
+        default=False,
+        required=False,
+        action='store_true',
+        dest='show_progress',
+    )
+    args = parser.parse_args()
+
+    make_arable_map(
+        args.current_path,
+        args.scenario_path,
+        args.results_path,
+        args.concurrency,
+        args.show_progress,
+    )
+
+if __name__ == "__main__":
+    main()

From 3661da032f2a95f84af9d09a5ee51749fb006cbf Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Mon, 16 Sep 2024 11:31:53 +0200
Subject: [PATCH 07/36] README updates

---
 method.md | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/method.md b/method.md
index c2ff283..088e1f9 100644
--- a/method.md
+++ b/method.md
@@ -1,8 +1,11 @@
 ---
 path: /root
 ---
+
 # How to run the pipeline for LIFE
 
+From [Eyres et al](https://www.cambridge.org/engage/coe/article-details/65801ab4e9ebbb4db92dad33).
+
 ## Build the environment
 
 ### The geospatial compute container
@@ -70,12 +73,12 @@ For querying the IUCN data held in the PostGIS database we use a seperate contai
 
 To calculate the AoH we need various basemaps:
 
-* Habitat maps for four scenarios:
-    * Current day, in both L1 and L2 IUCN habitat classification
-    * Potential Natural Vegetation (PNV) showing the habitats predicted without human intevention
-    * Restore scenario - a map derived from the PNV and current maps showing certain lands restored to their pre-human
-    * Conserve scenario - a map derived form current indicating the impact of placement of arable lands
-* The Digital Elevation Map (DEM) which has the height per pixel in meters
+- Habitat maps for four scenarios:
+  - Current day, in both L1 and L2 IUCN habitat classification
+  - Potential Natural Vegetation (PNV) showing the habitats predicted without human intevention
+  - Restore scenario - a map derived from the PNV and current maps showing certain lands restored to their pre-human
+  - Conserve scenario - a map derived form current indicating the impact of placement of arable lands
+- The Digital Elevation Map (DEM) which has the height per pixel in meters
 
 All these maps must be at the same pixel spacing and projection, and the output AoH maps will be at that same pixel resolution and projection.
 
@@ -129,15 +132,16 @@ python3 ./aoh-calculator/habitat_process.py --habitat /data/habitat/current_raw.
                                             --output /data/habitat_maps/current/
 ```
 
-
 ### Generating additional habitat maps
 
-From [Eyres et al]():
+LIFE calculates the impact on extinction rates under two future scenarios: restoration of habitats to their pre-human state, and the converstion of non-urban terrestrial habitat to arable.
 
-For the restoration map:
+The definition of the restore layer from [Eyres et al](https://www.cambridge.org/engage/coe/article-details/65801ab4e9ebbb4db92dad33 is:
 
 | In the restoration scenario all areas classified as arable or pasture were restored to their PNV.
 
+We generate the restore habitat layers thus:
+
 ```shark-run:aohbuilder
 python3 ./prepare-layers/make_restore_map.py --pnv /data/habitat/pnv_raw.tif \
                                    --current /data/habitat/current_raw.tif \
@@ -149,7 +153,7 @@ python3 ./prepare-layers/make_restore_map.py --pnv /data/habitat/pnv_raw.tif \
                                              --output /data/habitat_maps/restore/
 ```
 
-For the conservation map:
+The definition of the arable layer from [Eyres et al](https://www.cambridge.org/engage/coe/article-details/65801ab4e9ebbb4db92dad33 is:
 
 | In the conversion scenario all habitats currently mapped as natural or pasture were converted to arable land.
 
@@ -195,7 +199,6 @@ gdalwarp -t_srs EPSG:4326 -tr 0.016666666666667 -0.016666666666667 -r min -co CO
 gdal_calc -A /data/habitat/arable_diff.tif -B /data/area-per-pixel.tif --out /data/habitat/arable_diff_area.tif --calc="A*B"
 ```
 
-
 ### Fetching the elevation map
 
 To assist with provenance, we download the data from the Zenodo ID.
@@ -211,7 +214,6 @@ gdalwarp -t_srs EPSG:4326 -tr 0.016666666666667 -0.016666666666667 -r min -co CO
 gdalwarp -t_srs EPSG:4326 -tr 0.016666666666667 -0.016666666666667 -r max -co COMPRESS=LZW -wo NUM_THREADS=40 /data/elevation.tif /data/elevation-max-1k.tif
 ```
 
-
 ## Calculating AoH
 
 Once all the data has been collected, we can now calclate the AoH maps.
@@ -231,7 +233,6 @@ python3 ./prepare-species/extract_species_psql.py --output /data/species-info/ -
 
 The reason for doing this primarly one of pipeline optimisation, though it also makes the tasks of debugging and provenance tracing much easier. Most build systems, including the one we use, let you notice when files have updated and only do the work required based on that update. If we have many thousands of species on the redlise and only a few update, if we base our calculation on a single file with all species in, we'll have to calculate all thousands of results. But with this step added in, we will re-generate the per species per season GeoJSON files, which is cheap, but then we can spot that most of them haven't changed and we don't need to then calculate the rasters for those ones in the next stage.
 
-
 ### Calculate AoH
 
 This step generates a single AoH raster for a single one of the above GeoJSON files.
@@ -279,12 +280,10 @@ The results you then want will all be in:
 /data/aohs/pnv/
 ```
 
-
 ## Calculating persistence maps
 
 For each species we use the AoH data to calculate the likelihood of extinction under two scenarios: restoration and conseravation. To do that we work out the delta_p value per species, and then sum together all those results per species into a single layer.
 
-
 ```shark-run:aohbuilder
 python3 ./deltap/global_code_residents_pixel_AE_128.py --speciesdata /data/species-info/current/* \
                                                        --current-path /data/aohs/current/ \
@@ -325,4 +324,4 @@ python3 ./deltap/delta_p_hectare.py --input /data/deltap/arable_0.25.tif \
 ```shark-publish
 /data/deltap/scaled_restore_0.25.tif
 /data/deltap/scaled_arable_0.25.tif
-```
\ No newline at end of file
+```

From 655af9e84098070f0b53b721b37db60a4e2bff9f Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Mon, 16 Sep 2024 11:32:55 +0200
Subject: [PATCH 08/36] README link fix

---
 method.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/method.md b/method.md
index 088e1f9..580835c 100644
--- a/method.md
+++ b/method.md
@@ -136,7 +136,7 @@ python3 ./aoh-calculator/habitat_process.py --habitat /data/habitat/current_raw.
 
 LIFE calculates the impact on extinction rates under two future scenarios: restoration of habitats to their pre-human state, and the converstion of non-urban terrestrial habitat to arable.
 
-The definition of the restore layer from [Eyres et al](https://www.cambridge.org/engage/coe/article-details/65801ab4e9ebbb4db92dad33 is:
+The definition of the restore layer from [Eyres et al](https://www.cambridge.org/engage/coe/article-details/65801ab4e9ebbb4db92dad33) is:
 
 | In the restoration scenario all areas classified as arable or pasture were restored to their PNV.
 
@@ -153,7 +153,7 @@ python3 ./prepare-layers/make_restore_map.py --pnv /data/habitat/pnv_raw.tif \
                                              --output /data/habitat_maps/restore/
 ```
 
-The definition of the arable layer from [Eyres et al](https://www.cambridge.org/engage/coe/article-details/65801ab4e9ebbb4db92dad33 is:
+The definition of the arable layer from [Eyres et al](https://www.cambridge.org/engage/coe/article-details/65801ab4e9ebbb4db92dad33) is:
 
 | In the conversion scenario all habitats currently mapped as natural or pasture were converted to arable land.
 

From 197b3c55325913d0ba232fe8165bd64390410b10 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Mon, 16 Sep 2024 11:33:53 +0200
Subject: [PATCH 09/36] README updates

---
 method.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/method.md b/method.md
index 580835c..59beca1 100644
--- a/method.md
+++ b/method.md
@@ -140,8 +140,6 @@ The definition of the restore layer from [Eyres et al](https://www.cambridge.org
 
 | In the restoration scenario all areas classified as arable or pasture were restored to their PNV.
 
-We generate the restore habitat layers thus:
-
 ```shark-run:aohbuilder
 python3 ./prepare-layers/make_restore_map.py --pnv /data/habitat/pnv_raw.tif \
                                    --current /data/habitat/current_raw.tif \

From b82695eca55b68ca8773a2cb24c7d920f027c46e Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Mon, 16 Sep 2024 11:36:29 +0200
Subject: [PATCH 10/36] README updates

---
 method.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/method.md b/method.md
index 59beca1..b8f8584 100644
--- a/method.md
+++ b/method.md
@@ -136,7 +136,7 @@ python3 ./aoh-calculator/habitat_process.py --habitat /data/habitat/current_raw.
 
 LIFE calculates the impact on extinction rates under two future scenarios: restoration of habitats to their pre-human state, and the converstion of non-urban terrestrial habitat to arable.
 
-The definition of the restore layer from [Eyres et al](https://www.cambridge.org/engage/coe/article-details/65801ab4e9ebbb4db92dad33) is:
+The definition of the restore layer from Section 5 of [Eyres et al](https://www.cambridge.org/engage/coe/article-details/65801ab4e9ebbb4db92dad33) is:
 
 | In the restoration scenario all areas classified as arable or pasture were restored to their PNV.
 
@@ -151,7 +151,7 @@ python3 ./prepare-layers/make_restore_map.py --pnv /data/habitat/pnv_raw.tif \
                                              --output /data/habitat_maps/restore/
 ```
 
-The definition of the arable layer from [Eyres et al](https://www.cambridge.org/engage/coe/article-details/65801ab4e9ebbb4db92dad33) is:
+The definition of the arable layer from Section 5 of [Eyres et al](https://www.cambridge.org/engage/coe/article-details/65801ab4e9ebbb4db92dad33) is:
 
 | In the conversion scenario all habitats currently mapped as natural or pasture were converted to arable land.
 

From be3bb717a312598094b12e94c4517c1b4cc8744e Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Mon, 23 Sep 2024 12:38:38 +0100
Subject: [PATCH 11/36] Fixes and tweaks.

---
 aoh-calculator                               |   2 +-
 deltap/global_code_residents_pixel_AE_128.py |  68 ++++++++---
 method.md                                    |  94 +++++++++------
 prepare-layers/generate_crosswalk.py         |   2 +-
 prepare-layers/make_arable_map.py            |  21 +++-
 prepare-layers/make_diff_map.py              | 115 +++++++++++++++----
 utils/enumerate_habitats.py                  |  55 +++++++++
 7 files changed, 276 insertions(+), 81 deletions(-)
 create mode 100644 utils/enumerate_habitats.py

diff --git a/aoh-calculator b/aoh-calculator
index a920bbc..614d65a 160000
--- a/aoh-calculator
+++ b/aoh-calculator
@@ -1 +1 @@
-Subproject commit a920bbc95857efee9c06f691cd07fd098df89dc3
+Subproject commit 614d65a586815ca9021894287f217723d6102b8f
diff --git a/deltap/global_code_residents_pixel_AE_128.py b/deltap/global_code_residents_pixel_AE_128.py
index 573017d..18808a7 100644
--- a/deltap/global_code_residents_pixel_AE_128.py
+++ b/deltap/global_code_residents_pixel_AE_128.py
@@ -1,6 +1,7 @@
 import argparse
 import math
 import os
+import sys
 import types
 
 import geopandas as gpd
@@ -67,9 +68,9 @@ def global_code_residents_pixel_ae(
     try:
         filtered_species_info = gpd.read_file(species_data_path)
     except: # pylint:disable=W0702
-        quit(f"Failed to read {species_data_path}")
+        sys.exit(f"Failed to read {species_data_path}")
     taxid = filtered_species_info.id_no.values[0]
-    season = filtered_species_info.seasonal.values[0]
+    season = int(filtered_species_info.seasonal.values[0])
 
     try:
         exp_val = float(exponent)
@@ -80,20 +81,30 @@ def global_code_residents_pixel_ae(
             z_exponent_func_float = gen_gompertz
             z_exponent_func_raster = numpy_gompertz
         else:
-            quit(f"unrecognised exponent {exponent}")
+            sys.exit(f"unrecognised exponent {exponent}")
 
     match season:
-        case seasons.RESIDENT:
+        case 1: #seasons.RESIDENT:
             filename = f"{taxid}_{season}.tif"
             try:
                 current = open_layer_as_float64(os.path.join(current_aohs_path, filename))
+            except FileNotFoundError:
+                print(f"Failed to open current layer {os.path.join(current_aohs_path, filename)}")
+                sys.exit()
+            try:
                 scenario = open_layer_as_float64(os.path.join(scenario_aohs_path, filename))
+            except FileNotFoundError:
+                print(f"Failed to open scenario layer {os.path.join(scenario_aohs_path, filename)}")
+                sys.exit()
+            try:
                 historic_AOH = RasterLayer.layer_from_file(os.path.join(historic_aohs_path, filename)).sum()
             except FileNotFoundError as fnf:
-                quit(f"Failed to open {fnf.filename}")
+                print(f"Failed to open historic layer {os.path.join(historic_aohs_path, filename)}")
+                sys.exit()
 
             if historic_AOH == 0.0:
-                quit(f"Historic AoH for {taxid} is zero, aborting")
+                print(f"Historic AoH for {taxid} is zero, aborting")
+                sys.exit()
 
             layers = [current, scenario]
             union = RasterLayer.find_union(layers)
@@ -112,16 +123,26 @@ def global_code_residents_pixel_ae(
             delta_p = RasterLayer.empty_raster_layer_like(new_p_layer, filename=os.path.join(output_folder, filename))
             calc.save(delta_p)
 
-        case seasons.NONBREEDING:
+        case 3: #seasons.NONBREEDING:
             nonbreeding_filename = f"{taxid}_{seasons.NONBREEDING}.tif"
             breeding_filename = f"{taxid}_{seasons.BREEDING}.tif"
 
-            historic_AOH_breeding = RasterLayer.layer_from_file(os.path.join(historic_aohs_path, breeding_filename)).sum()
-            if historic_AOH_breeding == 0.0:
-                quit(f"Historic AoH breeding for {taxid} is zero, aborting")
-            historic_AOH_non_breeding = RasterLayer.layer_from_file(os.path.join(historic_aohs_path, nonbreeding_filename)).sum()
-            if historic_AOH_non_breeding == 0.0:
-                quit(f"Historic AoH for non breeding {taxid} is zero, aborting")
+            try:
+                historic_AOH_breeding = RasterLayer.layer_from_file(os.path.join(historic_aohs_path, breeding_filename)).sum()
+                if historic_AOH_breeding == 0.0:
+                    print(f"Historic AoH breeding for {taxid} is zero, aborting")
+                    sys.exit()
+            except FileNotFoundError:
+                print(f"Historic AoH for breeding {taxid} not found, aborting")
+                sys.exit()
+            try:
+                historic_AOH_non_breeding = RasterLayer.layer_from_file(os.path.join(historic_aohs_path, nonbreeding_filename)).sum()
+                if historic_AOH_non_breeding == 0.0:
+                    print(f"Historic AoH for non breeding {taxid} is zero, aborting")
+                    sys.exit()
+            except FileNotFoundError:
+                print(f"Historic AoH for non breeding {taxid} not found, aborting")
+                sys.exit()
 
 
             if scenario_aohs_path != "nan":
@@ -133,11 +154,24 @@ def global_code_residents_pixel_ae(
 
             try:
                 current_breeding = open_layer_as_float64(os.path.join(current_aohs_path, breeding_filename))
+            except FileNotFoundError as fnf:
+                print(f"Failed to open current breeding {os.path.join(current_aohs_path, breeding_filename)}")
+                sys.exit()
+            try:
                 current_non_breeding = open_layer_as_float64(os.path.join(current_aohs_path, nonbreeding_filename))
+            except FileNotFoundError as fnf:
+                print(f"Failed to open current non breeding {os.path.join(current_aohs_path, nonbreeding_filename)}")
+                sys.exit()
+            try:
                 scenario_breeding = open_layer_as_float64(breeding_scenario_path)
+            except FileNotFoundError as fnf:
+                print(f"Failed to open scenario breeding {breeding_scenario_path}")
+                sys.exit()
+            try:
                 scenario_non_breeding = open_layer_as_float64(non_breeding_scenario_path)
             except FileNotFoundError as fnf:
-                quit(f"Failed to open {fnf.filename}")
+                print(f"Failed to open sceario non breeding{fnf.filename}")
+                sys.exit()
 
             layers = [current_breeding, current_non_breeding, scenario_breeding, scenario_non_breeding]
             union = RasterLayer.find_union(layers)
@@ -165,10 +199,10 @@ def global_code_residents_pixel_ae(
             output = RasterLayer.empty_raster_layer_like(new_p_breeding, filename=args['output_path'])
             delta_p_layer.save(output)
 
-        case seasons.BREEDING:
+        case 2: #seasons.BREEDING:
             pass # covered by the nonbreeding case
         case _:
-            quit(f"Expected season for species {taxid}: {season}")
+            sys.exit(f"Unexpected season for species {taxid}: {season}")
 
 def main() -> None:
     parser = argparse.ArgumentParser()
@@ -213,7 +247,7 @@ def main() -> None:
         args.species_data_path,
         args.current_path,
         args.scenario_path,
-        args.historic_path.
+        args.historic_path,
         args.exponent,
         args.output_path,
     )
diff --git a/method.md b/method.md
index b8f8584..32428c1 100644
--- a/method.md
+++ b/method.md
@@ -27,11 +27,8 @@ For use with the [shark pipeline](https://github.com/quantifyearth/shark), we ne
  (run (shell "mkdir -p /root"))
  (workdir "/root")
  (copy (src "requirements.txt") (dst "./"))
- (run (network host) (shell "pip install --no-cache-dir -r requirements.txt"))
- (copy (src "prepare-layers") (dst "./"))
- (copy (src "prepare-species") (dst "./"))
  (copy (src "aoh-calculator") (dst "./"))
- (copy (src "deltap") (dst "./"))
+ (run (network host) (shell "pip install --no-cache-dir -r requirements.txt"))
 )
 ```
 
@@ -71,6 +68,21 @@ For querying the IUCN data held in the PostGIS database we use a seperate contai
 
 ## Fetching the required data
 
+```shark-build:layer-prep
+((from ghcr.io/osgeo/gdal:ubuntu-small-3.8.5)
+ (run (network host) (shell "apt-get update -qqy && apt-get -y install python3-pip libpq-dev git && rm -rf /var/lib/apt/lists/* && rm -rf /var/cache/apt/*"))
+ (run (network host) (shell "pip install --upgrade pip"))
+ (run (network host) (shell "pip install 'numpy<2'"))
+ (run (network host) (shell "pip install gdal[numpy]==3.8.5"))
+ (run (shell "mkdir -p /root"))
+ (workdir "/root")
+ (copy (src "requirements.txt") (dst "./"))
+ (copy (src "aoh-calculator") (dst "./"))
+ (run (network host) (shell "pip install --no-cache-dir -r requirements.txt"))
+ (copy (src "prepare-layers") (dst "./"))
+)
+```
+
 To calculate the AoH we need various basemaps:
 
 - Habitat maps for four scenarios:
@@ -103,7 +115,7 @@ reclaimer zenodo --zenodo_id 4058819 \
 
 For LIFE the crosswalk table is generated using code by Daniele Baisero's [IUCN Modlib](https://gitlab.com/daniele.baisero/iucn-modlib/) package:
 
-```shark-run:aohbuilder
+```shark-run:layer-prep
 python3 ./prepare-layers/generate_crosswalk.py --output /data/crosswalk.csv
 ```
 
@@ -111,7 +123,7 @@ The PNV map is only classified at Level 1 of the IUCN habitat codes, and so to m
 
 | The current layer maps IUCN level 1 and 2 habitats, but habitats in the PNV layer are mapped only at IUCN level 1, so to estimate species’ proportion of original AOH now remaining we could only use natural habitats mapped at level 1 and artificial habitats at level 2.
 
-```shark-run:aohbuilder
+```shark-run:layer-prep
 python3 ./prepare-layers/make_current_map.py --jung /data/habitat/jung_l2_raw.tif \
                                              --crosswalk /data/crosswalk.csv \
                                              --output /data/habitat/current_raw.tif \
@@ -120,13 +132,13 @@ python3 ./prepare-layers/make_current_map.py --jung /data/habitat/jung_l2_raw.ti
 
 The habitat map by Jung et al is at 100m resolution in World Berhman projection, and for IUCN compatible AoH maps we use Molleide at 1KM resolution, so we use GDAL to do the resampling for this:
 
-```shark-run:aohbuilder
+```shark-run:layer-prep
 python3 ./aoh-calculator/habitat_process.py --habitat /data/habitat/pnv_raw.tif \
                                             --scale 0.016666666666667 \
                                             --output /data/habitat_maps/pnv/
 ```
 
-```shark-run:aohbuilder
+```shark-run:layer-prep
 python3 ./aoh-calculator/habitat_process.py --habitat /data/habitat/current_raw.tif \
                                             --scale 0.016666666666667 \
                                             --output /data/habitat_maps/current/
@@ -140,7 +152,7 @@ The definition of the restore layer from Section 5 of [Eyres et al](https://www.
 
 | In the restoration scenario all areas classified as arable or pasture were restored to their PNV.
 
-```shark-run:aohbuilder
+```shark-run:layer-prep
 python3 ./prepare-layers/make_restore_map.py --pnv /data/habitat/pnv_raw.tif \
                                    --current /data/habitat/current_raw.tif \
                                    --crosswalk /data/crosswalk.csv \
@@ -155,7 +167,7 @@ The definition of the arable layer from Section 5 of [Eyres et al](https://www.c
 
 | In the conversion scenario all habitats currently mapped as natural or pasture were converted to arable land.
 
-```shark-run:aohbuilder
+```shark-run:layer-prep
 python3 ./prepare-layers/make_arable_map.py --current /data/habitat/current_raw.tif \
                                   --crosswalk /data/crosswalk.csv \
                                   --output /data/habitat/arable.tif
@@ -169,7 +181,7 @@ python3 ./aoh-calculator/habitat_process.py --habitat /data/habitat/arable.tif \
 
 For LIFE we need to know the actual area, not just pixel count. For this we generate a map that contains the area per pixel for a given latitude which is one pixel wide, and then we sweep that across for a given longitude.
 
-```shark-run:aohbuilder
+```shark-run:layer-prep
 python3 ./prepare-layers/make_area_map.py --scale 0.016666666666667 --output /data/area-per-pixel.tif
 ```
 
@@ -177,24 +189,20 @@ python3 ./prepare-layers/make_area_map.py --scale 0.016666666666667 --output /da
 
 In the algorithm we use need to account for map projection distortions, so all values in the AoHs are based on the area per pixel. To get the final extinction risk values we must remove that scaling. To do that we generate a map of area difference from current for the given scenario.
 
-```shark-run:aohbuilder
+```shark-run:layer-prep
 python3 ./prepare-layers/make_diff_map.py --current /data/habitat/current_raw.tif \
                                           --scenario /data/habitat/restore.tif \
-                                          --output /data/habitat/restore_diff_raw.tif
-
-gdalwarp -t_srs EPSG:4326 -tr 0.016666666666667 -0.016666666666667 -r min -co COMPRESS=LZW -wo NUM_THREADS=40 /data/habitat/restore_diff_raw.tif /data/habitat/restore_diff.tif
-
-gdal_calc -A /data/habitat/restore_diff.tif -B /data/area-per-pixel.tif --out /data/habitat/restore_diff_area.tif --calc="A*B"
+                                          --area /data/area-per-pixel.tif \
+                                          --scale 0.016666666666667 \
+                                          --output /data/habitat/restore_diff_area.tif
 ```
 
-```shark-run:aohbuilder
+```shark-run:layer-prep
 python3 ./prepare-layers/make_diff_map.py --current /data/habitat/current_raw.tif \
                                           --scenario /data/habitat/arable.tif \
-                                          --output /data/habitat/arable_diff_raw.tif
-
-gdalwarp -t_srs EPSG:4326 -tr 0.016666666666667 -0.016666666666667 -r min -co COMPRESS=LZW -wo NUM_THREADS=40 /data/habitat/arable_diff_raw.tif /data/habitat/arable_diff.tif
-
-gdal_calc -A /data/habitat/arable_diff.tif -B /data/area-per-pixel.tif --out /data/habitat/arable_diff_area.tif --calc="A*B"
+                                          --area /data/area-per-pixel.tif \
+                                          --scale 0.016666666666667 \
+                                          --output /data/habitat/arable_diff_area.tif
 ```
 
 ### Fetching the elevation map
@@ -235,7 +243,7 @@ The reason for doing this primarly one of pipeline optimisation, though it also
 
 This step generates a single AoH raster for a single one of the above GeoJSON files.
 
-```shark-run:aohbuilder
+```shark-run:aoh-calc
 python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/current/ \
                                     --elevation-max /data/elevation-max-1k.tif \
                                     --elevation-min /data/elevation-min-1k.tif \
@@ -280,24 +288,42 @@ The results you then want will all be in:
 
 ## Calculating persistence maps
 
+
+```shark-build:deltap
+((from aohbuilder)
+((from ghcr.io/osgeo/gdal:ubuntu-small-3.8.5)
+ (run (network host) (shell "apt-get update -qqy && apt-get -y install python3-pip libpq-dev git && rm -rf /var/lib/apt/lists/* && rm -rf /var/cache/apt/*"))
+ (run (network host) (shell "pip install --upgrade pip"))
+ (run (network host) (shell "pip install 'numpy<2'"))
+ (run (network host) (shell "pip install gdal[numpy]==3.8.5"))
+ (run (shell "mkdir -p /root"))
+ (workdir "/root")
+ (copy (src "requirements.txt") (dst "./"))
+ (copy (src "aoh-calculator") (dst "./"))
+ (run (network host) (shell "pip install --no-cache-dir -r requirements.txt"))
+  (copy (src "deltap") (dst "./"))
+)
+```
+
 For each species we use the AoH data to calculate the likelihood of extinction under two scenarios: restoration and conseravation. To do that we work out the delta_p value per species, and then sum together all those results per species into a single layer.
 
-```shark-run:aohbuilder
+
+```shark-run:deltap
 python3 ./deltap/global_code_residents_pixel_AE_128.py --speciesdata /data/species-info/current/* \
-                                                       --current-path /data/aohs/current/ \
-                                                       --scenario-path /data/aohs/restore/ \
-                                                       --historic-path /data/aohs/pnv/ \
+                                                       --current_path /data/aohs/current/ \
+                                                       --scenario_path /data/aohs/restore/ \
+                                                       --historic_path /data/aohs/pnv/ \
                                                        --z 0.25 \
-                                                       --output /data/deltap/restore/
+                                                       --output_path /data/deltap/restore/
 
 python3 ./utils/raster_sum.py --input /data/deltap/restore/ --output /data/deltap/restore_0.25.tif
 
 python3 ./deltap/global_code_residents_pixel_AE_128.py --speciesdata /data/species-info/current/* \
-                                                       --current-path /data/aohs/current/ \
-                                                       --scenario-path /data/aohs/arable/ \
-                                                       --historic-path /data/aohs/pnv/ \
+                                                       --current_path /data/aohs/current/ \
+                                                       --scenario_path /data/aohs/arable/ \
+                                                       --historic_path /data/aohs/pnv/ \
                                                        --z 0.25 \
-                                                       --output /data/deltap/arable/
+                                                       --output_path /data/deltap/arable/
 
 python3 ./utils/raster_sum.py --input /data/deltap/arable/ --output /data/deltap/arable_0.25.tif
 ```
@@ -309,7 +335,7 @@ python3 ./utils/raster_sum.py --input /data/deltap/arable/ --output /data/deltap
 
 Finally, we need to scale the results for publication:
 
-```shark-run:aohbuilder
+```shark-run:deltap
 python3 ./deltap/delta_p_hectare.py --input /data/deltap/restore_0.25.tif \
                                     --diffmap /data/habitat/restore_diff_area.tif \
                                     --output /data/deltap/scaled_restore_0.25.tif
diff --git a/prepare-layers/generate_crosswalk.py b/prepare-layers/generate_crosswalk.py
index 3b7488f..e9fcfd9 100644
--- a/prepare-layers/generate_crosswalk.py
+++ b/prepare-layers/generate_crosswalk.py
@@ -13,7 +13,7 @@
     "4", "4.1", "4.2", "4.3", "4.4", "4.5", "4.6", "4.7",
     "5", "5.1", "5.2", "5.3", "5.4", "5.5", "5.6", "5.7", "5.8", "5.9",
     "5.10", "5.11", "5.12", "5.13", "5.14", "5.15", "5.16", "5.17", "5.18",
-    "6"
+    "6",
     "7", "7.1", "7.2",
     "8", "8.1", "8.2", "8.3",
     "9", "9.1", "9.2", "9.3", "9.4", "9.5", "9.6", "9.7", "9.8", "9.9", "9.10",
diff --git a/prepare-layers/make_arable_map.py b/prepare-layers/make_arable_map.py
index 0204694..e90483a 100644
--- a/prepare-layers/make_arable_map.py
+++ b/prepare-layers/make_arable_map.py
@@ -10,9 +10,19 @@
 from alive_progress import alive_bar
 from yirgacheffe.layers import RasterLayer
 
-# From Eyres et al: In the conversion scenario all habitats currently mapped as natural or pasture were converted to arable land
-IUCN_CODE_ARTIFICAL = [
-    "14", "14.3", "14.4", "14.5", "14.6"
+# From Eyres et al:
+# All natural terrestrial habitats and non-urban artificial habitats
+IUCN_CODE_NATURAL = [
+    "1", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7", "1.8", "1.9",
+    "2", "2.1", "2.2",
+    "3", "3.1", "3.2", "3.3", "3.4", "3.5", "3.6", "3.7", "3.8",
+    "4", "4.1", "4.2", "4.3", "4.4", "4.5", "4.6", "4.7",
+    "6",
+    "8", "8.1", "8.2", "8.3",
+    "14.1", "14.2", "14.3", "14.4", "14.6", # urban removed
+    #"16", # Not in crosswalk due to iucn_modlib
+    "17",
+    #"18", # Not in crosswalk due to iucn_modlib
 ]
 ARABLE = "14.1"
 
@@ -37,13 +47,14 @@ def make_arable_map(
     with RasterLayer.layer_from_file(current_path) as current:
         crosswalk = load_crosswalk_table(crosswalk_path)
 
-        map_preserve_code = list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_ARTIFICAL]))
+        map_replace_codes = list(set(list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_NATURAL]))))
+        print(map_replace_codes)
         # arable_code = crosswalk[ARABLE][0]
         arable_code = 1401 # This is a hack as Daniele's crosswalk has 14.1 mapped to both 1400 and 1401 and there's no logical way
         # to understand this
 
         calc = current.numpy_apply(
-            lambda a: np.where(np.isin(a, map_preserve_code), a, arable_code)
+            lambda a: np.where(np.isin(a, map_replace_codes), arable_code, a)
         )
 
         with RasterLayer.empty_raster_layer_like(
diff --git a/prepare-layers/make_diff_map.py b/prepare-layers/make_diff_map.py
index 9e2fbf4..3e716ce 100644
--- a/prepare-layers/make_diff_map.py
+++ b/prepare-layers/make_diff_map.py
@@ -1,42 +1,86 @@
-import argparse 
+import argparse
+import os
+import shutil
+import tempfile
 from typing import Dict, List, Optional
 
 from osgeo import gdal
 from alive_progress import alive_bar
-from yirgacheffe.layers import RasterLayer
+from yirgacheffe.layers import RasterLayer, UniformAreaLayer
 
-def make_arable_map(
+def make_diff_map(
     current_path: str,
     scenario_path: str,
+    area_path: str,
+    pixel_scale: float,
+    target_projection: Optional[str],
     output_path: str,
     concurrency: Optional[int],
     show_progress: bool,
 ) -> None:
-    with RasterLayer.layer_from_file(current_path) as current:
-        with RasterLayer.layer_from_file(scenario_path) as scenario:
+    with tempfile.TemporaryDirectory() as tmpdir:
+        raw_map_filename = os.path.join(tmpdir, "raw.tif")
+        with RasterLayer.layer_from_file(current_path) as current:
+            with RasterLayer.layer_from_file(scenario_path) as scenario:
 
-            layers = [current, scenario]
-            intersection = RasterLayer.find_intersection(layers)
-            for layer in layers:
-                layer.set_window_for_intersection(intersection)
+                layers = [current, scenario]
+                intersection = RasterLayer.find_intersection(layers)
+                for layer in layers:
+                    layer.set_window_for_intersection(intersection)
 
-            calc = current.numpy_apply(lambda a, b: a != b)
+                calc = current.numpy_apply(lambda a, b: a != b, scenario)
 
-            with RasterLayer.empty_raster_layer_like(
-                current,
-                filename=output_path,
-                datatype=gdal.GDT_Float32,
-                threads=16
-            ) as result:
-                if show_progress:
-                    with alive_bar(manual=True) as bar:
-                        calc.parallel_save(result, callback=bar, parallelism=concurrency)
-                else:
-                    calc.parallel_save(result, parallelism=concurrency)
+                with RasterLayer.empty_raster_layer_like(
+                    current,
+                    filename=raw_map_filename,
+                    datatype=gdal.GDT_Float32,
+                    threads=16
+                ) as result:
+                    if show_progress:
+                        with alive_bar(manual=True) as bar:
+                            calc.parallel_save(result, callback=bar, parallelism=concurrency)
+                    else:
+                        calc.parallel_save(result, parallelism=concurrency)
+
+        rescaled_map_filename = os.path.join(tmpdir, "rescaled.tif")
+        gdal.Warp(rescaled_map_filename, raw_map_filename, options=gdal.WarpOptions(
+            creationOptions=['COMPRESS=LZW', 'NUM_THREADS=16'],
+            multithread=True,
+            dstSRS=target_projection,
+            outputType=gdal.GDT_Float32,
+            xRes=pixel_scale,
+            yRes=0.0 - pixel_scale,
+            resampleAlg="average",
+            workingType=gdal.GDT_Float32
+        ))
+
+        with UniformAreaLayer.layer_from_file(area_path) as area_map:
+            with RasterLayer.layer_from_file(rescaled_map_filename) as diff_map:
+                layers = [area_map, diff_map]
+                intersection = RasterLayer.find_intersection(layers)
+                for layer in layers:
+                    layer.set_window_for_intersection(intersection)
+
+                area_adjusted_map_filename = os.path.join(tmpdir, "final.tif")
+                calc = area_map * diff_map
+
+                with RasterLayer.empty_raster_layer_like(
+                    diff_map,
+                    filename=area_adjusted_map_filename,
+                    datatype=gdal.GDT_Float32,
+                    threads=16
+                ) as result:
+                    if show_progress:
+                        with alive_bar(manual=True) as bar:
+                            calc.parallel_save(result, callback=bar, parallelism=concurrency)
+                    else:
+                        calc.parallel_save(result, parallelism=concurrency)
+
+                shutil.move(area_adjusted_map_filename, output_path)
 
 
 def main() -> None:
-    parser = argparse.ArgumentParser(description="Generate the arable scenario map.")
+    parser = argparse.ArgumentParser(description="Generate an area difference map.")
     parser.add_argument(
         '--current',
         type=str,
@@ -51,6 +95,28 @@ def main() -> None:
         required=True,
         dest='scenario_path',
     )
+    parser.add_argument(
+        '--area',
+        type=str,
+        help='Path of the area per pixel map',
+        required=True,
+        dest='area_path',
+    )
+    parser.add_argument(
+        "--scale",
+        type=float,
+        required=True,
+        dest="pixel_scale",
+        help="Output pixel scale value."
+    )
+    parser.add_argument(
+        '--projection',
+        type=str,
+        help="Target projection",
+        required=False,
+        dest="target_projection",
+        default=None
+    )
     parser.add_argument(
         '--output',
         type=str,
@@ -76,9 +142,12 @@ def main() -> None:
     )
     args = parser.parse_args()
 
-    make_arable_map(
+    make_diff_map(
         args.current_path,
         args.scenario_path,
+        args.area_path,
+        args.pixel_scale,
+        args.target_projection,
         args.results_path,
         args.concurrency,
         args.show_progress,
diff --git a/utils/enumerate_habitats.py b/utils/enumerate_habitats.py
new file mode 100644
index 0000000..599aaf5
--- /dev/null
+++ b/utils/enumerate_habitats.py
@@ -0,0 +1,55 @@
+import argparse
+import logging
+from functools import partial
+from multiprocessing import Pool, cpu_count
+from typing import Set
+
+from yirgacheffe.layers import RasterLayer  # type: ignore
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)-8s %(message)s')
+
+BLOCKSIZE = 512
+
+def enumerate_subset(
+    habitat_path: str,
+    offset: int,
+) -> Set[int]:
+    with RasterLayer.layer_from_file(habitat_path) as habitat_map:
+        blocksize = min(BLOCKSIZE, habitat_map.window.ysize - offset)
+        data = habitat_map.read_array(0, offset, habitat_map.window.xsize, blocksize)
+        values = data.flatten()
+        res = set(values)
+    return res
+
+def enumerate_terrain_types(
+    habitat_path: str
+) -> Set[int]:
+    with RasterLayer.layer_from_file(habitat_path) as habitat_map:
+        ysize = habitat_map.window.ysize
+    blocks = range(0, ysize, BLOCKSIZE)
+    logger.info("Enumerating habitat classes in raster...")
+    with Pool(processes=int(cpu_count() / 2)) as pool:
+        sets = pool.map(partial(enumerate_subset, habitat_path), blocks)
+    superset = set()
+    for s in sets:
+        superset.update(s)
+    logger.info(superset)
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Downsample habitat map to raster per terrain type.")
+    parser.add_argument(
+        '--map',
+        type=str,
+        help="Initial habitat.",
+        required=True,
+        dest="habitat_path"
+    )
+    args = parser.parse_args()
+
+    enumerate_terrain_types(
+        args.habitat_path,
+    )
+
+if __name__ == "__main__":
+    main()

From e76b17ece685bdc623ab2204c81f92936919612d Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Tue, 24 Sep 2024 12:36:15 +0100
Subject: [PATCH 12/36] Misc fixes

---
 deltap/global_code_residents_pixel_AE_128.py |  2 +-
 method.md                                    |  8 ++--
 utils/raster_sum.py                          | 45 ++++++++++----------
 3 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/deltap/global_code_residents_pixel_AE_128.py b/deltap/global_code_residents_pixel_AE_128.py
index 18808a7..1da4435 100644
--- a/deltap/global_code_residents_pixel_AE_128.py
+++ b/deltap/global_code_residents_pixel_AE_128.py
@@ -196,7 +196,7 @@ def global_code_residents_pixel_ae(
 
             delta_p_layer = new_p_layer - ConstantLayer(old_persistence)
 
-            output = RasterLayer.empty_raster_layer_like(new_p_breeding, filename=args['output_path'])
+            output = RasterLayer.empty_raster_layer_like(new_p_breeding, filename=os.path.join(output_folder, nonbreeding_filename))
             delta_p_layer.save(output)
 
         case 2: #seasons.BREEDING:
diff --git a/method.md b/method.md
index 32428c1..a669ad8 100644
--- a/method.md
+++ b/method.md
@@ -243,7 +243,7 @@ The reason for doing this primarly one of pipeline optimisation, though it also
 
 This step generates a single AoH raster for a single one of the above GeoJSON files.
 
-```shark-run:aoh-calc
+```shark-run:aohbuilder
 python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/current/ \
                                     --elevation-max /data/elevation-max-1k.tif \
                                     --elevation-min /data/elevation-min-1k.tif \
@@ -290,7 +290,6 @@ The results you then want will all be in:
 
 
 ```shark-build:deltap
-((from aohbuilder)
 ((from ghcr.io/osgeo/gdal:ubuntu-small-3.8.5)
  (run (network host) (shell "apt-get update -qqy && apt-get -y install python3-pip libpq-dev git && rm -rf /var/lib/apt/lists/* && rm -rf /var/cache/apt/*"))
  (run (network host) (shell "pip install --upgrade pip"))
@@ -302,6 +301,7 @@ The results you then want will all be in:
  (copy (src "aoh-calculator") (dst "./"))
  (run (network host) (shell "pip install --no-cache-dir -r requirements.txt"))
   (copy (src "deltap") (dst "./"))
+  (copy (src "utils") (dst "./"))
 )
 ```
 
@@ -316,7 +316,7 @@ python3 ./deltap/global_code_residents_pixel_AE_128.py --speciesdata /data/speci
                                                        --z 0.25 \
                                                        --output_path /data/deltap/restore/
 
-python3 ./utils/raster_sum.py --input /data/deltap/restore/ --output /data/deltap/restore_0.25.tif
+python3 ./utils/raster_sum.py --rasters_directory /data/deltap/restore/ --output /data/deltap/restore_0.25.tif
 
 python3 ./deltap/global_code_residents_pixel_AE_128.py --speciesdata /data/species-info/current/* \
                                                        --current_path /data/aohs/current/ \
@@ -325,7 +325,7 @@ python3 ./deltap/global_code_residents_pixel_AE_128.py --speciesdata /data/speci
                                                        --z 0.25 \
                                                        --output_path /data/deltap/arable/
 
-python3 ./utils/raster_sum.py --input /data/deltap/arable/ --output /data/deltap/arable_0.25.tif
+python3 ./utils/raster_sum.py --rasters_directory /data/deltap/arable/ --output /data/deltap/arable_0.25.tif
 ```
 
 ```shark-publish2
diff --git a/utils/raster_sum.py b/utils/raster_sum.py
index 199964e..ec43c7a 100644
--- a/utils/raster_sum.py
+++ b/utils/raster_sum.py
@@ -24,28 +24,26 @@ def worker(
         if path is None:
             break
 
-        partial_raster = RasterLayer.layer_from_file(path)
-
-        if merged_result is None:
-            merged_result = RasterLayer.empty_raster_layer_like(partial_raster, datatype=gdal.GDT_Float64)
-            cleaned_raster = partial_raster.numpy_apply(lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0))
-            cleaned_raster.save(merged_result)
-        else:
-            merged_result.reset_window()
-
-            union = YirgacheffeLayer.find_union([merged_result, partial_raster])
-            merged_result.set_window_for_union(union)
-            partial_raster.set_window_for_union(union)
-
-            calc = merged_result + (partial_raster.numpy_apply(lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0)))
-            temp = RasterLayer.empty_raster_layer_like(merged_result, datatype=gdal.GDT_Float64)
-            calc.save(temp)
-            merged_result = temp
-
-    final = RasterLayer.empty_raster_layer_like(merged_result, filename=output_tif)
-    assert merged_result is not None
-    merged_result.save(final)
-    del merged_result
+        with RasterLayer.layer_from_file(path) as partial_raster:
+            if merged_result is None:
+                merged_result = RasterLayer.empty_raster_layer_like(partial_raster, datatype=gdal.GDT_Float64)
+                cleaned_raster = partial_raster.numpy_apply(lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0))
+                cleaned_raster.save(merged_result)
+            else:
+                merged_result.reset_window()
+
+                union = YirgacheffeLayer.find_union([merged_result, partial_raster])
+                merged_result.set_window_for_union(union)
+                partial_raster.set_window_for_union(union)
+
+                calc = merged_result + (partial_raster.numpy_apply(lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0)))
+                temp = RasterLayer.empty_raster_layer_like(merged_result, datatype=gdal.GDT_Float64)
+                calc.save(temp)
+                merged_result = temp
+
+    if merged_result:
+        final = RasterLayer.empty_raster_layer_like(merged_result, filename=output_tif)
+        merged_result.save(final)
 
 def build_k(
     images_dir: str,
@@ -54,6 +52,9 @@ def build_k(
 ) -> None:
 
     files = [os.path.join(images_dir, x) for x in glob.glob("*.tif", root_dir=images_dir)]
+    if not files:
+        print(f"No files in {images_dir}, aborting", file=sys.stderr)
+        sys.exit(-1)
 
     with tempfile.TemporaryDirectory() as tempdir:
         with Manager() as manager:

From cd6db956de8361e3a077bb142c8bc4e29b994916 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Mon, 30 Sep 2024 17:07:12 +0100
Subject: [PATCH 13/36] Updated to generate scaled delta_p maps for publication

---
 deltap/delta_p_scaled_area.py | 44 +++++++++++++++++++++++++----------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/deltap/delta_p_scaled_area.py b/deltap/delta_p_scaled_area.py
index 04b7e3b..ed5e1ad 100644
--- a/deltap/delta_p_scaled_area.py
+++ b/deltap/delta_p_scaled_area.py
@@ -1,4 +1,6 @@
 import argparse
+import os
+from glob import glob
 
 import numpy as np
 from yirgacheffe.layers import RasterLayer
@@ -10,28 +12,46 @@ def delta_p_scaled_area(
     diff_area_map_path: str,
     output_path: str,
 ):
-    with RasterLayer.layer_from_file(diff_area_map_path) as area_restore:
-        with RasterLayer.layer_from_file(input_path) as inlayer:
+    per_taxa = [
+        RasterLayer.layer_from_file(os.path.join(input_path, x))
+        for x in sorted(glob("*.tif", root_dir=input_path))
+    ]
+    area_restore = RasterLayer.layer_from_file(diff_area_map_path)
 
-            intersection = RasterLayer.find_intersection([area_restore, inlayer])
-            inlayer.set_window_for_intersection(intersection)
-            area_restore.set_window_for_intersection(intersection)
+    for layer in per_taxa:
+        layer.set_window_for_union(area_restore.area)
 
-            with RasterLayer.empty_raster_layer_like(inlayer, filename=output_path, nodata=float('nan')) as result:
+    area_restore_filter = area_restore.numpy_apply(lambda c: np.where(c < SCALE, float('nan'), c)) / SCALE
 
-                area_restore_filter = area_restore.numpy_apply(lambda c: np.where(c < SCALE, 0, c)) / SCALE
-                filtered_layer = inlayer.numpy_apply(lambda il, af: np.where(af != 0, il, 0), area_restore_filter)
-                scaled_filtered_layer = filtered_layer / area_restore_filter
-                scaled_filtered_layer.save(result)
+    dirname, basename = os.path.split(output_path)
+
+    per_taxa_path = os.path.join(dirname, f"per_taxa_{basename}")
+    with RasterLayer.empty_raster_layer_like(area_restore, filename=per_taxa_path, nodata=float('nan'), bands=len(per_taxa)) as result:
+        for idx in range(len(per_taxa)):
+            inlayer = per_taxa[idx]
+            _, name = os.path.split(inlayer.name)
+            result._dataset.GetRasterBand(idx+1).SetDescription(name[:-4])
+            filtered_layer = inlayer.numpy_apply(lambda il, af: np.where(af != 0, il, float('nan')), area_restore_filter)
+            scaled_filtered_layer = (filtered_layer / area_restore_filter) * -1.0
+            scaled_filtered_layer.parallel_save(result, band=idx + 1)
+
+    summed_output_path = os.path.join(dirname, f"summed_{basename}")
+    with RasterLayer.empty_raster_layer_like(area_restore, filename=summed_output_path, nodata=float('nan')) as result:
+        summed_layer = per_taxa[0]
+        for layer in per_taxa[1:]:
+            summed_layer = summed_layer + layer
+        filtered_layer = summed_layer.numpy_apply(lambda il, af: np.where(af != 0, il, float('nan')), area_restore_filter)
+        scaled_filtered_layer = (filtered_layer / area_restore_filter) * -1.0
+        scaled_filtered_layer.parallel_save(result)
 
 def main() -> None:
-    parser = argparse.ArgumentParser(description="Scale final results.")
+    parser = argparse.ArgumentParser(description="Scale final .")
     parser.add_argument(
         '--input',
         type=str,
         help='Path of map of extinction risk',
         required=True,
-        dest='current_path',
+        dest='input_path',
     )
     parser.add_argument(
         '--diffmap',

From 6e3bfc33ffefd028e1e7881f2bc8bc265a2c44a8 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Wed, 2 Oct 2024 13:59:57 +0100
Subject: [PATCH 14/36] Significantly speed up the database extraction script

---
 method.md                               |   2 +-
 prepare-species/cleaning.py             |   6 +-
 prepare-species/extract_species_psql.py | 117 ++++++++++++++++++------
 3 files changed, 91 insertions(+), 34 deletions(-)

diff --git a/method.md b/method.md
index a669ad8..271d782 100644
--- a/method.md
+++ b/method.md
@@ -1,5 +1,5 @@
 ---
-path: /root
+path: /roo
 ---
 
 # How to run the pipeline for LIFE
diff --git a/prepare-species/cleaning.py b/prepare-species/cleaning.py
index 22d7f40..0d1d8fb 100644
--- a/prepare-species/cleaning.py
+++ b/prepare-species/cleaning.py
@@ -25,9 +25,9 @@ def tidy_data(row: pd.Series) -> pd.Series:
 
     # Small difference (<50m) between lower and upper elevation
     elevation_diff = row.elevation_upper - row.elevation_lower
-    if elevation_diff < 50.0:
-        spare = 50.0 - elevation_diff
-        adjust = math.ceil(spare / 2.0)
+    if elevation_diff < 50:
+        spare = 50 - elevation_diff
+        adjust = math.ceil(spare / 2)
         row.elevation_lower -= adjust
         row.elevation_upper += adjust
 
diff --git a/prepare-species/extract_species_psql.py b/prepare-species/extract_species_psql.py
index 5c6e473..be8c79a 100644
--- a/prepare-species/extract_species_psql.py
+++ b/prepare-species/extract_species_psql.py
@@ -1,11 +1,15 @@
 import argparse
 import os
-from typing import Optional
+from functools import partial
+from multiprocessing import Pool
+from typing import Optional, Tuple
 
 # import pyshark # pylint: disable=W0611
 import geopandas as gpd
 import pyproj
-from sqlalchemy import create_engine, text
+import psycopg2
+from postgis.psycopg import register
+from shapely import from_wkb
 
 from cleaning import tidy_data
 
@@ -15,17 +19,17 @@
     3: "NONBREEDING",
 }
 
-STATEMENT = """
+MAIN_STATEMENT = """
 WITH habitat_seasons AS (
 	SELECT
         assessment_habitats.assessment_id,
         assessment_habitats.habitat_id,
         CASE
             WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE 'Resident' THEN 1
-            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE 'Breeding%' THEN 2
-            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE 'Non%Breed%' THEN 3
-            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE 'Pass%' THEN 4
-            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE '%un%n%' THEN 1 -- capture 'uncertain' and 'unknown' as resident
+            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE 'Breeding%%' THEN 2
+            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE 'Non%%Bree%%' THEN 3
+            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE 'Pass%%' THEN 4
+            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE '%%un%%n%%' THEN 1 -- capture 'uncertain' and 'unknown' as resident
             ELSE 1
         END AS seasonal
     FROM
@@ -34,6 +38,11 @@
         LEFT JOIN assessment_habitats ON assessment_habitats.assessment_id = assessments.id
     WHERE
         assessments.latest = true
+        AND (
+            -- LIFE ignores marginal suitability
+            assessment_habitats.supplementary_fields->>'suitability' IS NULL
+            OR assessment_habitats.supplementary_fields->>'suitability' IN ('Suitable', 'Unknown')
+        )
 ),
 unique_seasons AS (
   	SELECT DISTINCT ON (taxons.scientific_name, habitat_seasons.seasonal)
@@ -43,7 +52,7 @@
         assessment_ranges.origin,
         STRING_AGG(habitat_lookup.code, '|') OVER (PARTITION BY taxons.scientific_name, habitat_seasons.seasonal ORDER BY assessment_ranges.id) AS full_habitat_code,
         STRING_AGG(system_lookup.description->>'en', '|') OVER (PARTITION BY taxons.scientific_name, habitat_seasons.seasonal ORDER BY assessment_ranges.id) AS systems,
-        (ST_COLLECT(assessment_ranges.geom::geometry) OVER (PARTITION BY taxons.scientific_name, habitat_seasons.seasonal ORDER BY assessment_ranges.id))::geography AS geometry,
+        STRING_AGG(assessment_ranges.id::text, '|') OVER (PARTITION BY taxons.scientific_name, habitat_seasons.seasonal ORDER BY assessment_ranges.id) AS ranges,
         (assessment_supplementary_infos.supplementary_fields->>'ElevationLower.limit')::numeric AS elevation_lower,
         (assessment_supplementary_infos.supplementary_fields->>'ElevationUpper.limit')::numeric AS elevation_upper,
         ROW_NUMBER() OVER (PARTITION BY taxons.scientific_name, habitat_seasons.seasonal ORDER BY assessments.id, assessment_ranges.id) AS rn
@@ -61,7 +70,7 @@
         assessments.latest = true
         AND taxons.class_id = 22672813 -- AVES
         AND habitat_seasons.habitat_id is not null
-        AND assessment_ranges.presence IN {presence}
+        AND assessment_ranges.presence IN %s
         AND assessment_ranges.origin IN (1, 2, 6)
         AND assessment_ranges.seasonal IN (1, 2, 3)
         AND red_list_category_lookup.code != 'EX'
@@ -72,20 +81,28 @@
     elevation_lower,
     elevation_upper,
     full_habitat_code,
-    geometry
+    ranges
 FROM
     unique_seasons
 WHERE
     rn = 1
     -- the below queries must happen on the aggregate data
-    AND full_habitat_code NOT LIKE '7%'
-    AND full_habitat_code NOT LIKE '%|7%'
-    AND systems NOT LIKE '%Marine%'
-LIMIT 50
+    AND full_habitat_code NOT LIKE '7%%'
+    AND full_habitat_code NOT LIKE '%%|7%%'
+    AND systems NOT LIKE '%%Marine%%'
 """
 
-CURRENT_STATEMENT = STATEMENT.format(presence="(1, 2)")
-HISTORIC_STATEMENT = STATEMENT.format(presence="(1, 2, 4, 5)")
+GEOMETRY_STATEMENT = """
+SELECT
+    ST_UNION(assessment_ranges.geom::geometry) AS geometry
+FROM
+    assessment_ranges
+WHERE
+    assessment_ranges.id IN %s
+    AND assessment_ranges.presence IN %s
+    AND assessment_ranges.origin IN (1, 2, 6)
+    AND assessment_ranges.seasonal IN (1, 2, 3)
+"""
 
 DB_HOST = os.getenv("DB_HOST")
 DB_PORT = os.getenv("DB_PORT", "5432")
@@ -96,26 +113,66 @@
 	f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
 )
 
-def extract_data_per_species(
+def process_row(
     output_directory_path: str,
-    target_projection: Optional[str],
+    presence: Tuple[int],
+    row: Tuple,
 ) -> None:
-
     # The geometry is in CRS 4326, but the AoH work is done in World_Behrmann, aka Projected CRS: ESRI:54017
     src_crs = pyproj.CRS.from_epsg(4326)
     target_crs = src_crs #pyproj.CRS.from_string(target_projection)
 
-    engine = create_engine(DB_CONFIG, echo=False)
-    for era, statement in ("current", CURRENT_STATEMENT), ("historic", HISTORIC_STATEMENT):
-        os.makedirs(os.path.join(output_directory_path, era), exist_ok=True)
-        dfi = gpd.read_postgis(text(statement), con=engine, geom_col="geometry", chunksize=1024)
-        for df in dfi:
-            for _, raw in df.iterrows():
-                row = tidy_data(raw)
-                output_path = os.path.join(output_directory_path, era, f"{row.id_no}_{SEASON_NAME[row.seasonal]}.geojson")
-                res = gpd.GeoDataFrame(row.to_frame().transpose(), crs=src_crs, geometry="geometry")
-                res_projected = res.to_crs(target_crs)
-                res_projected.to_file(output_path, driver="GeoJSON")
+    connection = psycopg2.connect(DB_CONFIG)
+    register(connection)
+    curs = connection.cursor()
+
+    id_no, seasonal, elevation_lower, elevation_upper, full_habitat_code, range_ids = row
+
+    cleaned_range_ids = set([int(x) for x in range_ids.split('|')])
+
+    curs.execute(GEOMETRY_STATEMENT, (tuple(cleaned_range_ids), presence))
+    geometry = curs.fetchall()
+    if len(geometry) == 0:
+        return
+    elif len(geometry) > 1:
+        raise ValueError("Expected just a single geometry value")
+
+    x = (geometry[0][0])
+    x = from_wkb(x.to_ewkb())
+
+    gdf = gpd.GeoDataFrame(
+        [[id_no, seasonal, int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, full_habitat_code]],
+        columns=["id_no", "seasonal", "elevation_lower", "elevation_upper", "full_habitat_code"],
+        crs='epsg:4326', geometry=[x])
+    graw = gdf.loc[0].copy()
+
+    grow = tidy_data(graw)
+    output_path = os.path.join(output_directory_path, f"{grow.id_no}_{SEASON_NAME[grow.seasonal]}.geojson")
+    res = gpd.GeoDataFrame(grow.to_frame().transpose(), crs=src_crs, geometry="geometry")
+    res_projected = res.to_crs(target_crs)
+    res_projected.to_file(output_path, driver="GeoJSON")
+
+def extract_data_per_species(
+    output_directory_path: str,
+    target_projection: Optional[str],
+) -> None:
+
+    connection = psycopg2.connect(DB_CONFIG)
+    curs = connection.cursor()
+
+    # engine = create_engine(DB_CONFIG, echo=False)
+    for era, presence in [("current", (1, 2)), ("historic", (1, 2, 4, 5))]:
+        era_output_directory_path = os.path.join(output_directory_path, era)
+        os.makedirs(os.path.join(era_output_directory_path, era), exist_ok=True)
+
+        curs.execute(MAIN_STATEMENT, (presence,))
+        # This can be quite big (tens of thousands), but in modern computer term is quite small
+        # and I need to make a follow on DB query per result.
+        results = curs.fetchall()
+
+        # The limiting amount here is how many concurrent connections the database can take
+        with Pool(processes=20) as pool:
+            pool.map(partial(process_row, era_output_directory_path, presence), results)
 
 def main() -> None:
     parser = argparse.ArgumentParser(description="Process agregate species data to per-species-file.")

From 93571e8dc1a1730ac7834753a3705836d596a9a7 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Thu, 3 Oct 2024 10:44:33 +0100
Subject: [PATCH 15/36] Reworked species database collection.

---
 prepare-species/extract_species_psql.py | 256 ++++++++++++++----------
 1 file changed, 151 insertions(+), 105 deletions(-)

diff --git a/prepare-species/extract_species_psql.py b/prepare-species/extract_species_psql.py
index be8c79a..5b6bc74 100644
--- a/prepare-species/extract_species_psql.py
+++ b/prepare-species/extract_species_psql.py
@@ -1,4 +1,5 @@
 import argparse
+import logging
 import os
 from functools import partial
 from multiprocessing import Pool
@@ -8,11 +9,13 @@
 import geopandas as gpd
 import pyproj
 import psycopg2
+import shapely
 from postgis.psycopg import register
-from shapely import from_wkb
 
 from cleaning import tidy_data
 
+logger = logging.getLogger(__name__)
+
 SEASON_NAME = {
     1: "RESIDENT",
     2: "BREEDING",
@@ -20,85 +23,50 @@
 }
 
 MAIN_STATEMENT = """
-WITH habitat_seasons AS (
-	SELECT
-        assessment_habitats.assessment_id,
-        assessment_habitats.habitat_id,
-        CASE
-            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE 'Resident' THEN 1
-            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE 'Breeding%%' THEN 2
-            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE 'Non%%Bree%%' THEN 3
-            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE 'Pass%%' THEN 4
-            WHEN (assessment_habitats.supplementary_fields->>'season') ILIKE '%%un%%n%%' THEN 1 -- capture 'uncertain' and 'unknown' as resident
-            ELSE 1
-        END AS seasonal
-    FROM
-        public.assessments
-        LEFT JOIN taxons ON taxons.id = assessments.taxon_id
-        LEFT JOIN assessment_habitats ON assessment_habitats.assessment_id = assessments.id
-    WHERE
-        assessments.latest = true
-        AND (
-            -- LIFE ignores marginal suitability
-            assessment_habitats.supplementary_fields->>'suitability' IS NULL
-            OR assessment_habitats.supplementary_fields->>'suitability' IN ('Suitable', 'Unknown')
-        )
-),
-unique_seasons AS (
-  	SELECT DISTINCT ON (taxons.scientific_name, habitat_seasons.seasonal)
-        assessments.sis_taxon_id as id_no,
-        assessment_ranges.seasonal,
-        assessment_ranges.presence,
-        assessment_ranges.origin,
-        STRING_AGG(habitat_lookup.code, '|') OVER (PARTITION BY taxons.scientific_name, habitat_seasons.seasonal ORDER BY assessment_ranges.id) AS full_habitat_code,
-        STRING_AGG(system_lookup.description->>'en', '|') OVER (PARTITION BY taxons.scientific_name, habitat_seasons.seasonal ORDER BY assessment_ranges.id) AS systems,
-        STRING_AGG(assessment_ranges.id::text, '|') OVER (PARTITION BY taxons.scientific_name, habitat_seasons.seasonal ORDER BY assessment_ranges.id) AS ranges,
-        (assessment_supplementary_infos.supplementary_fields->>'ElevationLower.limit')::numeric AS elevation_lower,
-        (assessment_supplementary_infos.supplementary_fields->>'ElevationUpper.limit')::numeric AS elevation_upper,
-        ROW_NUMBER() OVER (PARTITION BY taxons.scientific_name, habitat_seasons.seasonal ORDER BY assessments.id, assessment_ranges.id) AS rn
-    FROM
-        assessments
-        LEFT JOIN taxons ON taxons.id = assessments.taxon_id
-        LEFT JOIN assessment_ranges ON assessment_ranges.assessment_id = assessments.id
-        LEFT JOIN habitat_seasons ON habitat_seasons.assessment_id = assessments.id AND habitat_seasons.seasonal = assessment_ranges.seasonal
-        LEFT JOIN assessment_systems ON assessment_systems.assessment_id = assessments.id
-        LEFT JOIN system_lookup ON assessment_systems.system_lookup_id = system_lookup.id
-        LEFT JOIN habitat_lookup ON habitat_lookup.id = habitat_seasons.habitat_id
-        LEFT JOIN assessment_supplementary_infos ON assessment_supplementary_infos.assessment_id = assessments.id
-        LEFT JOIN red_list_category_lookup ON red_list_category_lookup.id = assessments.red_list_category_id
-    WHERE
-        assessments.latest = true
-        AND taxons.class_id = 22672813 -- AVES
-        AND habitat_seasons.habitat_id is not null
-        AND assessment_ranges.presence IN %s
-        AND assessment_ranges.origin IN (1, 2, 6)
-        AND assessment_ranges.seasonal IN (1, 2, 3)
-        AND red_list_category_lookup.code != 'EX'
-    )
 SELECT
-    id_no,
-    seasonal,
-    elevation_lower,
-    elevation_upper,
-    full_habitat_code,
-    ranges
+    assessments.sis_taxon_id as id_no,
+    assessments.id as assessment_id,
+    (assessment_supplementary_infos.supplementary_fields->>'ElevationLower.limit')::numeric AS elevation_lower,
+    (assessment_supplementary_infos.supplementary_fields->>'ElevationUpper.limit')::numeric AS elevation_upper
 FROM
-    unique_seasons
+    assessments
+    LEFT JOIN taxons ON taxons.id = assessments.taxon_id
+    LEFT JOIN assessment_supplementary_infos ON assessment_supplementary_infos.assessment_id = assessments.id
+    LEFT JOIN red_list_category_lookup ON red_list_category_lookup.id = assessments.red_list_category_id
 WHERE
-    rn = 1
-    -- the below queries must happen on the aggregate data
-    AND full_habitat_code NOT LIKE '7%%'
-    AND full_habitat_code NOT LIKE '%%|7%%'
-    AND systems NOT LIKE '%%Marine%%'
+    assessments.latest = true
+    AND taxons.class_name = %s
+    AND red_list_category_lookup.code NOT IN ('DD', 'NE', 'EX')
+"""
+
+HABITATS_STATEMENT = """
+SELECT
+    STRING_AGG(habitat_lookup.code, '|') AS full_habitat_code,
+    STRING_AGG(system_lookup.description->>'en', '|') AS systems
+FROM
+    assessments
+    LEFT JOIN assessment_habitats ON assessment_habitats.assessment_id = assessments.id
+    LEFT JOIN habitat_lookup on habitat_lookup.id = assessment_habitats.habitat_id
+    LEFT JOIN assessment_systems ON assessment_systems.assessment_id = assessments.id
+    LEFT JOIN system_lookup ON assessment_systems.system_lookup_id = system_lookup.id
+WHERE
+    assessments.id = %s
+    AND (
+        -- LIFE ignores marginal suitability, and ignores majorImportance
+        assessment_habitats.supplementary_fields->>'suitability' IS NULL
+        OR assessment_habitats.supplementary_fields->>'suitability' IN ('Suitable', 'Unknown')
+    )
 """
 
 GEOMETRY_STATEMENT = """
 SELECT
-    ST_UNION(assessment_ranges.geom::geometry) AS geometry
+    assessment_ranges.seasonal,
+    ST_UNION(assessment_ranges.geom::geometry) OVER (PARTITION BY assessment_ranges.seasonal) AS geometry
 FROM
-    assessment_ranges
+    assessments
+    LEFT JOIN assessment_ranges On assessment_ranges.assessment_id = assessments.id
 WHERE
-    assessment_ranges.id IN %s
+    assessments.id = %s
     AND assessment_ranges.presence IN %s
     AND assessment_ranges.origin IN (1, 2, 6)
     AND assessment_ranges.seasonal IN (1, 2, 3)
@@ -113,44 +81,123 @@
 	f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
 )
 
+def tidy_reproject_save(
+    gdf: gpd.GeoDataFrame,
+    output_directory_path: str
+) -> None:
+    # The geometry is in CRS 4326, but the AoH work is done in World_Behrmann, aka Projected CRS: ESRI:54017
+    src_crs = pyproj.CRS.from_epsg(4326)
+    target_crs = src_crs #pyproj.CRS.from_string(target_projection)
+
+    graw = gdf.loc[0].copy()
+    grow = tidy_data(graw)
+    os.makedirs(output_directory_path, exist_ok=True)
+    output_path = os.path.join(output_directory_path, f"{grow.id_no}_{grow.season}.geojson")
+    res = gpd.GeoDataFrame(grow.to_frame().transpose(), crs=src_crs, geometry="geometry")
+    res_projected = res.to_crs(target_crs)
+    res_projected.to_file(output_path, driver="GeoJSON")
+
+
 def process_row(
     output_directory_path: str,
     presence: Tuple[int],
     row: Tuple,
 ) -> None:
-    # The geometry is in CRS 4326, but the AoH work is done in World_Behrmann, aka Projected CRS: ESRI:54017
-    src_crs = pyproj.CRS.from_epsg(4326)
-    target_crs = src_crs #pyproj.CRS.from_string(target_projection)
 
     connection = psycopg2.connect(DB_CONFIG)
     register(connection)
-    curs = connection.cursor()
+    cursor = connection.cursor()
+
 
-    id_no, seasonal, elevation_lower, elevation_upper, full_habitat_code, range_ids = row
+    id_no, assessment_id, elevation_lower, elevation_upper = row
 
-    cleaned_range_ids = set([int(x) for x in range_ids.split('|')])
+    cursor.execute(HABITATS_STATEMENT, (assessment_id,))
+    habitats = cursor.fetchall()
 
-    curs.execute(GEOMETRY_STATEMENT, (tuple(cleaned_range_ids), presence))
-    geometry = curs.fetchall()
-    if len(geometry) == 0:
+    if len(habitats) == 0:
+        # No matching habitats
         return
-    elif len(geometry) > 1:
-        raise ValueError("Expected just a single geometry value")
+    elif len(habitats) > 1:
+        raise ValueError("expected just one habitat value")
 
-    x = (geometry[0][0])
-    x = from_wkb(x.to_ewkb())
+    # Clean up habitats to ensure they're unique (the system agg in the SQL statement might duplicate them)
+    raw_habitats, systems = habitats[0]
+
+    if systems is None:
+        logging.warning("Skipping %s: no systems in DB", id_no)
+        return
+    if "Marine" in systems:
+        logging.info("Skipping %s: marine in systems", id_no)
+        return
+
+    if raw_habitats is None:
+        logging.warning("Skipping %s: no habitats in DB", id_no)
+        return
+    habitats = list(set([x for x in raw_habitats.split('|')]))
+    if len(habitats) == 0:
+        logging.info("Skipping %s: No habitats", id_no)
+        return
+    if any([x.startswith('7') for x in habitats]):
+        logging.info("Skipping %s: Habitat 7 in habitat list", id_no)
+        return
+
+    full_habitat_code = '|'.join(habitats)
+
+    cursor.execute(GEOMETRY_STATEMENT, (assessment_id, presence))
+    geometries_data = cursor.fetchall()
+    if len(geometries_data) == 0:
+        logging.info("Skipping %s: no habitats", id_no)
+        return
+    geometries = {}
+    for season, geometry in geometries_data:
+        geometries[season] = shapely.normalize(shapely.from_wkb(geometry.to_ewkb()))
+
+    seasons = list(geometries.keys())
+    if seasons == [1]:
+        # Resident only
+        gdf = gpd.GeoDataFrame(
+            [[id_no, SEASON_NAME[1], int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, full_habitat_code, geometries[1]]],
+            columns=["id_no", "season", "elevation_lower", "elevation_upper", "full_habitat_code", "geometry"],
+            crs='epsg:4326'
+        )
+        tidy_reproject_save(gdf, output_directory_path)
+    else:
+        # Breeding and non-breeding
+        if 1 in seasons and 2 in seasons:
+            season_2 = shapely.union(geometries[2], geometries[1])
+        elif 2 in seasons:
+            season_2 = geometries[2]
+        elif 1 in seasons:
+            season_2 = geometries[1]
+        else:
+            logging.info("Skipping %s: no geometries for breeding", id_no)
+            return
+
+        if 1 in seasons and 3 in seasons:
+            season_3 = shapely.union(geometries[3], geometries[1])
+        elif 3 in seasons:
+            season_3 = geometries[3]
+        elif 1 in seasons:
+            season_3 = geometries[1]
+        else:
+            logging.info("Skipping %s: no geometries for non-breeding", id_no)
+            return
+
+        gdf = gpd.GeoDataFrame(
+            [[id_no, SEASON_NAME[2], int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, full_habitat_code, season_2]],
+            columns=["id_no", "season", "elevation_lower", "elevation_upper", "full_habitat_code", "geometry"],
+            crs='epsg:4326'
+        )
+        tidy_reproject_save(gdf, output_directory_path)
+
+        gdf = gpd.GeoDataFrame(
+            [[id_no, SEASON_NAME[3], int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, full_habitat_code, season_3]],
+            columns=["id_no", "season", "elevation_lower", "elevation_upper", "full_habitat_code", "geometry"],
+            crs='epsg:4326',
+        )
+        tidy_reproject_save(gdf, output_directory_path)
 
-    gdf = gpd.GeoDataFrame(
-        [[id_no, seasonal, int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, full_habitat_code]],
-        columns=["id_no", "seasonal", "elevation_lower", "elevation_upper", "full_habitat_code"],
-        crs='epsg:4326', geometry=[x])
-    graw = gdf.loc[0].copy()
 
-    grow = tidy_data(graw)
-    output_path = os.path.join(output_directory_path, f"{grow.id_no}_{SEASON_NAME[grow.seasonal]}.geojson")
-    res = gpd.GeoDataFrame(grow.to_frame().transpose(), crs=src_crs, geometry="geometry")
-    res_projected = res.to_crs(target_crs)
-    res_projected.to_file(output_path, driver="GeoJSON")
 
 def extract_data_per_species(
     output_directory_path: str,
@@ -158,21 +205,20 @@ def extract_data_per_species(
 ) -> None:
 
     connection = psycopg2.connect(DB_CONFIG)
-    curs = connection.cursor()
+    cursor = connection.cursor()
 
-    # engine = create_engine(DB_CONFIG, echo=False)
     for era, presence in [("current", (1, 2)), ("historic", (1, 2, 4, 5))]:
-        era_output_directory_path = os.path.join(output_directory_path, era)
-        os.makedirs(os.path.join(era_output_directory_path, era), exist_ok=True)
+        for classname in ['AMPHIBIA', 'AVES', 'MAMMALIA', 'REPTILIA']:
+            era_output_directory_path = os.path.join(output_directory_path, era, classname)
 
-        curs.execute(MAIN_STATEMENT, (presence,))
-        # This can be quite big (tens of thousands), but in modern computer term is quite small
-        # and I need to make a follow on DB query per result.
-        results = curs.fetchall()
+            cursor.execute(MAIN_STATEMENT, (classname,))
+            # This can be quite big (tens of thousands), but in modern computer term is quite small
+            # and I need to make a follow on DB query per result.
+            results = cursor.fetchall()
 
-        # The limiting amount here is how many concurrent connections the database can take
-        with Pool(processes=20) as pool:
-            pool.map(partial(process_row, era_output_directory_path, presence), results)
+            # The limiting amount here is how many concurrent connections the database can take
+            with Pool(processes=20) as pool:
+                pool.map(partial(process_row, era_output_directory_path, presence), results)
 
 def main() -> None:
     parser = argparse.ArgumentParser(description="Process agregate species data to per-species-file.")

From de1e90501b1931e15e4d9f2a9e4d7b2a717be736 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Thu, 3 Oct 2024 11:14:38 +0100
Subject: [PATCH 16/36] Document species selection process.

---
 prepare-species/readme.md | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 prepare-species/readme.md

diff --git a/prepare-species/readme.md b/prepare-species/readme.md
new file mode 100644
index 0000000..4fb3560
--- /dev/null
+++ b/prepare-species/readme.md
@@ -0,0 +1,25 @@
+# Species selection for LIFE
+
+LIFE is currently based on the following species selection criteria from the IUCN Redlist list of endangered species. In general we align with the guidelines set out in [Recent developments in the production of Area of Habitat (AOH) maps for terrestrial vertebrates.]() by Busana et al.
+
+* We select from the classes AMPHIBIA, AVES, MAMMALIA, and REPTILIA.
+* We exclude species that are categorised as:
+    * Extinct
+    * Not endangered
+    * Data deficient
+* Select the most recent asessment for each species.
+* When selecting habitats we ignore those marked with their suitability as "marginal"
+* For ranges
+    * We select for origin as codes 1, 2 and 6 (Native, reintroduced, and assisted colonization)
+    * LIFE generates data under both current and historic scenarios, and so the selection process for ranges is different for each scenario:
+        * For current, we select under 1 and 2 (Extant and Probably Extant)
+        * For historic, we select under 1, 2, 4, and 5 (Extant, Probably Extant, Possibly Extinct, and Extinct)
+    * Seasonality is selecter from the categories Resident, Breeding, and non-Breeding. These are then combined in the following way:
+        * For species with only a resident range, we treat them as resident only.
+        * For species that are migratory (having either a breeding or non-breeding), we generate both a breeding and non-breeding range, where each is the union of the respective migratory range (if present) and the resident range if present.
+* For metadata, we do the following hygine steps:
+    * If elevation lower is missing, or less than the expected minimum we set it to that minimum: -500m
+    * If elevation upper is missing, or over the expected maximum we set it to that maximum: 9000m
+    * If the elevation lower is greater than the upper, we invert the two values
+    * If the difference between the two values is less than 50m then each value is equally adjusted out from centre to ensure that they are 50m apart.
+

From 476236640e8c6c7089d31507f8b84a98c5c31aeb Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Wed, 16 Oct 2024 08:12:59 +0100
Subject: [PATCH 17/36] First full pass of LIFE

---
 aoh-calculator                               |   2 +-
 deltap/delta_p_scaled_area.py                |  11 +-
 deltap/global_code_residents_pixel_AE_128.py |  28 +--
 method.md                                    |  78 +++++----
 prepare-species/extract_species_psql.py      |  28 +--
 utils/persistencegenerator.py                |  63 +++++++
 utils/raster_sum.py                          |   3 +-
 utils/speciesgenerator.py                    | 173 +++++--------------
 8 files changed, 187 insertions(+), 199 deletions(-)
 create mode 100644 utils/persistencegenerator.py

diff --git a/aoh-calculator b/aoh-calculator
index 614d65a..412433a 160000
--- a/aoh-calculator
+++ b/aoh-calculator
@@ -1 +1 @@
-Subproject commit 614d65a586815ca9021894287f217723d6102b8f
+Subproject commit 412433a917fb32160f9cd3989ad9efc2ae8ebea0
diff --git a/deltap/delta_p_scaled_area.py b/deltap/delta_p_scaled_area.py
index ed5e1ad..93e9213 100644
--- a/deltap/delta_p_scaled_area.py
+++ b/deltap/delta_p_scaled_area.py
@@ -12,6 +12,9 @@ def delta_p_scaled_area(
     diff_area_map_path: str,
     output_path: str,
 ):
+    dirname, basename = os.path.split(output_path)
+    os.makedirs(dirname, exist_ok=True)
+
     per_taxa = [
         RasterLayer.layer_from_file(os.path.join(input_path, x))
         for x in sorted(glob("*.tif", root_dir=input_path))
@@ -23,16 +26,13 @@ def delta_p_scaled_area(
 
     area_restore_filter = area_restore.numpy_apply(lambda c: np.where(c < SCALE, float('nan'), c)) / SCALE
 
-    dirname, basename = os.path.split(output_path)
-
     per_taxa_path = os.path.join(dirname, f"per_taxa_{basename}")
     with RasterLayer.empty_raster_layer_like(area_restore, filename=per_taxa_path, nodata=float('nan'), bands=len(per_taxa)) as result:
         for idx in range(len(per_taxa)):
             inlayer = per_taxa[idx]
             _, name = os.path.split(inlayer.name)
             result._dataset.GetRasterBand(idx+1).SetDescription(name[:-4])
-            filtered_layer = inlayer.numpy_apply(lambda il, af: np.where(af != 0, il, float('nan')), area_restore_filter)
-            scaled_filtered_layer = (filtered_layer / area_restore_filter) * -1.0
+            scaled_filtered_layer = inlayer.numpy_apply(lambda il, af: np.where(af != 0, (il / af) * -1.0, float('nan')), area_restore_filter)
             scaled_filtered_layer.parallel_save(result, band=idx + 1)
 
     summed_output_path = os.path.join(dirname, f"summed_{basename}")
@@ -40,8 +40,7 @@ def delta_p_scaled_area(
         summed_layer = per_taxa[0]
         for layer in per_taxa[1:]:
             summed_layer = summed_layer + layer
-        filtered_layer = summed_layer.numpy_apply(lambda il, af: np.where(af != 0, il, float('nan')), area_restore_filter)
-        scaled_filtered_layer = (filtered_layer / area_restore_filter) * -1.0
+        scaled_filtered_layer = summed_layer.numpy_apply(lambda il, af: np.where(af != 0, (il / af) * -1.0, float('nan')), area_restore_filter)
         scaled_filtered_layer.parallel_save(result)
 
 def main() -> None:
diff --git a/deltap/global_code_residents_pixel_AE_128.py b/deltap/global_code_residents_pixel_AE_128.py
index 1da4435..567c0dd 100644
--- a/deltap/global_code_residents_pixel_AE_128.py
+++ b/deltap/global_code_residents_pixel_AE_128.py
@@ -2,7 +2,7 @@
 import math
 import os
 import sys
-import types
+from enum import Enum
 
 import geopandas as gpd
 import numpy as np
@@ -13,10 +13,10 @@
 GOMPERTZ_B = -14.5
 GOMPERTZ_ALPHA = 1
 
-seasons = types.SimpleNamespace()
-seasons.RESIDENT = 1
-seasons.BREEDING = 2
-seasons.NONBREEDING = 3
+class Season(Enum):
+   RESIDENT = 1
+   BREEDING = 2
+   NONBREEDING = 3
 
 def gen_gompertz(x: float) -> float:
     return math.exp(-math.exp(GOMPERTZ_A + (GOMPERTZ_B * (x ** GOMPERTZ_ALPHA))))
@@ -70,7 +70,7 @@ def global_code_residents_pixel_ae(
     except: # pylint:disable=W0702
         sys.exit(f"Failed to read {species_data_path}")
     taxid = filtered_species_info.id_no.values[0]
-    season = int(filtered_species_info.seasonal.values[0])
+    season = Season[filtered_species_info.season.values[0]]
 
     try:
         exp_val = float(exponent)
@@ -84,8 +84,8 @@ def global_code_residents_pixel_ae(
             sys.exit(f"unrecognised exponent {exponent}")
 
     match season:
-        case 1: #seasons.RESIDENT:
-            filename = f"{taxid}_{season}.tif"
+        case Season.RESIDENT:
+            filename = f"{taxid}_{season.name}.tif"
             try:
                 current = open_layer_as_float64(os.path.join(current_aohs_path, filename))
             except FileNotFoundError:
@@ -106,6 +106,8 @@ def global_code_residents_pixel_ae(
                 print(f"Historic AoH for {taxid} is zero, aborting")
                 sys.exit()
 
+            print(f"current: {current.sum()}\nscenario: {scenario.sum()}\nhistoric: {historic_AOH.sum()}")
+
             layers = [current, scenario]
             union = RasterLayer.find_union(layers)
             for layer in layers:
@@ -117,15 +119,17 @@ def global_code_residents_pixel_ae(
             current_AOH = current.sum()
 
             new_p_layer = process_delta_p(current, scenario, current_AOH, historic_AOH, z_exponent_func_raster)
+            print(new_p_layer.sum())
 
             old_persistence = calc_persistence_value(current_AOH, historic_AOH, z_exponent_func_float)
+            print(old_persistence)
             calc = new_p_layer - ConstantLayer(old_persistence)
             delta_p = RasterLayer.empty_raster_layer_like(new_p_layer, filename=os.path.join(output_folder, filename))
             calc.save(delta_p)
 
-        case 3: #seasons.NONBREEDING:
-            nonbreeding_filename = f"{taxid}_{seasons.NONBREEDING}.tif"
-            breeding_filename = f"{taxid}_{seasons.BREEDING}.tif"
+        case Season.NONBREEDING:
+            nonbreeding_filename = f"{taxid}_{Season.NONBREEDING.name}.tif"
+            breeding_filename = f"{taxid}_{Season.BREEDING.name}.tif"
 
             try:
                 historic_AOH_breeding = RasterLayer.layer_from_file(os.path.join(historic_aohs_path, breeding_filename)).sum()
@@ -199,7 +203,7 @@ def global_code_residents_pixel_ae(
             output = RasterLayer.empty_raster_layer_like(new_p_breeding, filename=os.path.join(output_folder, nonbreeding_filename))
             delta_p_layer.save(output)
 
-        case 2: #seasons.BREEDING:
+        case Season.BREEDING:
             pass # covered by the nonbreeding case
         case _:
             sys.exit(f"Unexpected season for species {taxid}: {season}")
diff --git a/method.md b/method.md
index 271d782..c9316bb 100644
--- a/method.md
+++ b/method.md
@@ -1,5 +1,10 @@
 ---
-path: /roo
+path: /root
+TAXA:
+- AMPHIBIA
+- AVES
+CURVE:
+- "0.25"
 ---
 
 # How to run the pipeline for LIFE
@@ -59,7 +64,7 @@ For querying the IUCN data held in the PostGIS database we use a seperate contai
 ```shark-build:postgis
 ((from python:3.12-slim)
  (run (network host) (shell "apt-get update -qqy && apt-get -y install libpq-dev gcc git && rm -rf /var/lib/apt/lists/* && rm -rf /var/cache/apt/*"))
- (run (network host) (shell "pip install psycopg2 SQLalchemy geopandas"))
+ (run (network host) (shell "pip install psycopg2 postgis geopandas"))
  (run (network host) (shell "pip install git+https://github.com/quantifyearth/pyshark"))
  (copy (src "./prepare-species") (dst "/root/"))
  (workdir "/root/")
@@ -186,7 +191,6 @@ python3 ./prepare-layers/make_area_map.py --scale 0.016666666666667 --output /da
 ```
 
 ### Differences maps
-
 In the algorithm we use need to account for map projection distortions, so all values in the AoHs are based on the area per pixel. To get the final extinction risk values we must remove that scaling. To do that we generate a map of area difference from current for the given scenario.
 
 ```shark-run:layer-prep
@@ -234,7 +238,7 @@ export DB_USER=username
 export DB_PASSWORD=secretpassword
 export DB_NAME=iucnredlist
 
-python3 ./prepare-species/extract_species_psql.py --output /data/species-info/ --projection "EPSG:4326"
+python3 ./prepare-species/extract_species_psql.py --class %{TAXA} --output /data/species-info/%{TAXA}/ --projection "EPSG:4326"
 ```
 
 The reason for doing this primarly one of pipeline optimisation, though it also makes the tasks of debugging and provenance tracing much easier. Most build systems, including the one we use, let you notice when files have updated and only do the work required based on that update. If we have many thousands of species on the redlise and only a few update, if we base our calculation on a single file with all species in, we'll have to calculate all thousands of results. But with this step added in, we will re-generate the per species per season GeoJSON files, which is cheap, but then we can spot that most of them haven't changed and we don't need to then calculate the rasters for those ones in the next stage.
@@ -249,32 +253,32 @@ python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/current/ \
                                     --elevation-min /data/elevation-min-1k.tif \
                                     --area /data/area-per-pixel.tif \
                                     --crosswalk /data/crosswalk.csv \
-                                    --speciesdata /data/species-info/current/* \
-                                    --output /data/aohs/current/
+                                    --speciesdata /data/species-info/%{TAXA}/current/* \
+                                    --output /data/aohs/current/%{TAXA}/
 
 python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/restore/ \
                                     --elevation-max /data/elevation-max-1k.tif \
                                     --elevation-min /data/elevation-min-1k.tif \
                                     --area /data/area-per-pixel.tif \
                                     --crosswalk /data/crosswalk.csv \
-                                    --speciesdata /data/species-info/current/* \
-                                    --output /data/aohs/restore/
+                                    --speciesdata /data/species-info/%{TAXA}/current/* \
+                                    --output /data/aohs/restore/%{TAXA}/
 
 python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/arable/ \
                                     --elevation-max /data/elevation-max-1k.tif \
                                     --elevation-min /data/elevation-min-1k.tif \
                                     --area /data/area-per-pixel.tif \
                                     --crosswalk /data/crosswalk.csv \
-                                    --speciesdata /data/species-info/current/* \
-                                    --output /data/aohs/arable/
+                                    --speciesdata /data/species-info/%{TAXA}/current/* \
+                                    --output /data/aohs/arable/%{TAXA}/
 
 python3 ./aoh-calculator/aohcalc.py --habitats /data/habitat_maps/pnv/ \
                                     --elevation-max /data/elevation-max-1k.tif \
                                     --elevation-min /data/elevation-min-1k.tif \
                                     --area /data/area-per-pixel.tif \
                                     --crosswalk /data/crosswalk.csv \
-                                    --speciesdata /data/species-info/historic/* \
-                                    --output /data/aohs/pnv/
+                                    --speciesdata /data/species-info/%{TAXA}/historic/* \
+                                    --output /data/aohs/pnv/%{TAXA}/
 ```
 
 The results you then want will all be in:
@@ -309,23 +313,23 @@ For each species we use the AoH data to calculate the likelihood of extinction u
 
 
 ```shark-run:deltap
-python3 ./deltap/global_code_residents_pixel_AE_128.py --speciesdata /data/species-info/current/* \
-                                                       --current_path /data/aohs/current/ \
-                                                       --scenario_path /data/aohs/restore/ \
-                                                       --historic_path /data/aohs/pnv/ \
-                                                       --z 0.25 \
-                                                       --output_path /data/deltap/restore/
-
-python3 ./utils/raster_sum.py --rasters_directory /data/deltap/restore/ --output /data/deltap/restore_0.25.tif
-
-python3 ./deltap/global_code_residents_pixel_AE_128.py --speciesdata /data/species-info/current/* \
-                                                       --current_path /data/aohs/current/ \
-                                                       --scenario_path /data/aohs/arable/ \
-                                                       --historic_path /data/aohs/pnv/ \
-                                                       --z 0.25 \
-                                                       --output_path /data/deltap/arable/
-
-python3 ./utils/raster_sum.py --rasters_directory /data/deltap/arable/ --output /data/deltap/arable_0.25.tif
+python3 ./deltap/global_code_residents_pixel_AE_128.py --speciesdata /data/species-info/%{TAXA}/current/* \
+                                                       --current_path /data/aohs/current/%{TAXA}/ \
+                                                       --scenario_path /data/aohs/restore/%{TAXA}/ \
+                                                       --historic_path /data/aohs/pnv/%{TAXA}/ \
+                                                       --z %{CURVE} \
+                                                       --output_path /data/deltap/restore/%{CURVE}/%{TAXA}/
+
+python3 ./utils/raster_sum.py --rasters_directory /data/deltap/restore/%{CURVE}/%{TAXA}/ --output /data/deltap_sum/restore/%{CURVE}/%{TAXA}.tif
+
+python3 ./deltap/global_code_residents_pixel_AE_128.py --speciesdata /data/species-info/%{TAXA}/current/* \
+                                                       --current_path /data/aohs/current/%{TAXA}/ \
+                                                       --scenario_path /data/aohs/arable/%{TAXA}/ \
+                                                       --historic_path /data/aohs/pnv/%{TAXA}/ \
+                                                       --z %{CURVE} \
+                                                       --output_path /data/deltap/arable/%{CURVE}/%{TAXA}/
+
+python3 ./utils/raster_sum.py --rasters_directory /data/deltap/arable/%{CURVE}/%{TAXA}/ --output /data/deltap_sum/arable/%{CURVE}/%{TAXA}.tif
 ```
 
 ```shark-publish2
@@ -336,16 +340,16 @@ python3 ./utils/raster_sum.py --rasters_directory /data/deltap/arable/ --output
 Finally, we need to scale the results for publication:
 
 ```shark-run:deltap
-python3 ./deltap/delta_p_hectare.py --input /data/deltap/restore_0.25.tif \
-                                    --diffmap /data/habitat/restore_diff_area.tif \
-                                    --output /data/deltap/scaled_restore_0.25.tif
+python3 ./deltap/delta_p_scaled_area.py --input /data/deltap_sum/restore/%{CURVE}/ \
+                                        --diffmap /data/habitat/restore_diff_area.tif \
+                                        --output /data/deltap_final/scaled_restore_%{CURVE}.tif
 
-python3 ./deltap/delta_p_hectare.py --input /data/deltap/arable_0.25.tif \
-                                    --diffmap /data/habitat/arable_diff_area.tif \
-                                    --output /data/deltap/scaled_arable_0.25.tif
+python3 ./deltap/delta_p_scaled_area.py --input /data/deltap_sum/arable/%{CURVE}/ \
+                                        --diffmap /data/habitat/arable_diff_area.tif \
+                                        --output /data/deltap_final/scaled_arable_%{CURVE}.tif
 ```
 
 ```shark-publish
-/data/deltap/scaled_restore_0.25.tif
-/data/deltap/scaled_arable_0.25.tif
+/data/deltap_final/scaled_restore_%{CURVE}.tif
+/data/deltap_final/scaled_arable_%{CURVE}.tif
 ```
diff --git a/prepare-species/extract_species_psql.py b/prepare-species/extract_species_psql.py
index 5b6bc74..75e8be9 100644
--- a/prepare-species/extract_species_psql.py
+++ b/prepare-species/extract_species_psql.py
@@ -36,7 +36,7 @@
 WHERE
     assessments.latest = true
     AND taxons.class_name = %s
-    AND red_list_category_lookup.code NOT IN ('DD', 'NE', 'EX')
+    AND red_list_category_lookup.code NOT IN ('EX')
 """
 
 HABITATS_STATEMENT = """
@@ -200,6 +200,7 @@ def process_row(
 
 
 def extract_data_per_species(
+    classname: str,
     output_directory_path: str,
     target_projection: Optional[str],
 ) -> None:
@@ -208,20 +209,26 @@ def extract_data_per_species(
     cursor = connection.cursor()
 
     for era, presence in [("current", (1, 2)), ("historic", (1, 2, 4, 5))]:
-        for classname in ['AMPHIBIA', 'AVES', 'MAMMALIA', 'REPTILIA']:
-            era_output_directory_path = os.path.join(output_directory_path, era, classname)
+        era_output_directory_path = os.path.join(output_directory_path, era)
 
-            cursor.execute(MAIN_STATEMENT, (classname,))
-            # This can be quite big (tens of thousands), but in modern computer term is quite small
-            # and I need to make a follow on DB query per result.
-            results = cursor.fetchall()
+        cursor.execute(MAIN_STATEMENT, (classname,))
+        # This can be quite big (tens of thousands), but in modern computer term is quite small
+        # and I need to make a follow on DB query per result.
+        results = cursor.fetchall()
 
-            # The limiting amount here is how many concurrent connections the database can take
-            with Pool(processes=20) as pool:
-                pool.map(partial(process_row, era_output_directory_path, presence), results)
+        # The limiting amount here is how many concurrent connections the database can take
+        with Pool(processes=20) as pool:
+            pool.map(partial(process_row, era_output_directory_path, presence), results)
 
 def main() -> None:
     parser = argparse.ArgumentParser(description="Process agregate species data to per-species-file.")
+    parser.add_argument(
+        '--class',
+        type=str,
+        help="Species class name",
+        required=True,
+        dest="classname",
+    )
     parser.add_argument(
         '--output',
         type=str,
@@ -240,6 +247,7 @@ def main() -> None:
     args = parser.parse_args()
 
     extract_data_per_species(
+        args.classname,
         args.output_directory_path,
         args.target_projection
     )
diff --git a/utils/persistencegenerator.py b/utils/persistencegenerator.py
new file mode 100644
index 0000000..a22ccd2
--- /dev/null
+++ b/utils/persistencegenerator.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+from typing import List, Set
+
+import pandas as pd
+
+def species_generator(
+    input_dir: str,
+    output_csv_path: str
+):
+    taxas = os.listdir(input_dir)
+
+    res = []
+    for taxa in taxas:
+        taxa_path = os.path.join(input_dir, taxa, 'current')
+        speciess = os.listdir(taxa_path)
+        for scenario in ['arable', 'restore']:
+            for species in speciess:
+                res.append([
+                    os.path.join('/home/mwd24/lifetest/species-info/', taxa, 'current', species),
+                    os.path.join('/home/mwd24/lifetest/aohs/', 'current', taxa),
+                    os.path.join('/home/mwd24/lifetest/aohs/', scenario, taxa),
+                    os.path.join('/home/mwd24/lifetest/aohs/', 'pnv', taxa),
+                    '0.25',
+                    os.path.join('/home/mwd24/lifetest/deltap/', scenario, '0.25', taxa),
+                ])
+
+
+    df = pd.DataFrame(res, columns=[
+        '--speciesdata',
+        '--current_path',
+        '--scenario_path',
+        '--historic_path',
+        '--z',
+        '--output_path',
+    ])
+    df.to_csv(output_csv_path, index=False)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Species and seasonality generator.")
+    parser.add_argument(
+        '--input',
+        type=str,
+        help="directory with taxa folders of species info",
+        required=True,
+        dest="input_dir"
+    )
+    parser.add_argument(
+        '--output',
+        type=str,
+        help="name of output file for csv",
+        required=False,
+        dest="output"
+    )
+    args = parser.parse_args()
+
+    species_generator(args.input_dir, args.output)
+
+if __name__ == "__main__":
+    main()
diff --git a/utils/raster_sum.py b/utils/raster_sum.py
index ec43c7a..9a85f3d 100644
--- a/utils/raster_sum.py
+++ b/utils/raster_sum.py
@@ -50,6 +50,8 @@ def build_k(
     output_filename: str,
     processes_count: int
 ) -> None:
+    result_dir, filename = os.path.split(output_filename)
+    os.makedirs(result_dir, exist_ok=True)
 
     files = [os.path.join(images_dir, x) for x in glob.glob("*.tif", root_dir=images_dir)]
     if not files:
@@ -86,7 +88,6 @@ def build_k(
                 time.sleep(1)
 
             # here we should have now a set of images in tempdir to merge
-            result_dir, filename = os.path.split(output_filename)
             single_worker = Process(target=worker, args=(
                 filename,
                 result_dir,
diff --git a/utils/speciesgenerator.py b/utils/speciesgenerator.py
index e153507..7c0f092 100644
--- a/utils/speciesgenerator.py
+++ b/utils/speciesgenerator.py
@@ -1,84 +1,55 @@
 #!/usr/bin/env python3
 
 import argparse
-import contextlib
-import json
-import sys
-from typing import List, Set, Tuple
+import os
+from typing import List, Set
 
-import geopandas as gpd
 import pandas as pd
 
-#from aoh.apps.lib import seasonality
-import seasonality
-from iucn_modlib.classes.Taxon import Taxon
-from iucn_modlib.factories import TaxonFactories
+def species_generator(
+    input_dir: str,
+    output_csv_path: str
+):
+    taxas = os.listdir(input_dir)
+
+    res = []
+    for taxa in taxas:
+        for scenario in ['current', 'restore', 'arable', 'pnv']:
+            source = 'historic' if scenario == 'pnv' else 'current'
+            taxa_path = os.path.join(input_dir, taxa, source)
+            speciess = os.listdir(taxa_path)
+            for species in speciess:
+                res.append([
+                    os.path.join('/home/mwd24/lifetest/habitat_maps', scenario),
+                    '/home/mwd24/lifetest/elevation-max-1k.tif',
+                    '/home/mwd24/lifetest/elevation-min-1k.tif',
+                    '/home/mwd24/lifetest/area-per-pixel.tif',
+                    '/home/mwd24/lifetest/crosswalk.csv',
+                    os.path.join('/home/mwd24/lifetest/species-info/', taxa, source, species),
+                    os.path.join('/home/mwd24/lifetest/aohs/', scenario, taxa)
+                ])
+
+
+    df = pd.DataFrame(res, columns=[
+        '--habitats',
+        '--elevation-max',
+        '--elevation-min',
+        '--area',
+        '--crosswalk',
+        '--speciesdata',
+        '--output'
+    ])
+    df.to_csv(output_csv_path, index=False)
 
-@contextlib.contextmanager
-def file_writer(file_name = None):
-    writer = open(file_name, "w", encoding="utf-8") if file_name is not None else sys.stdout
-    yield writer
-    if file_name:
-        writer.close()
-
-def project_species_list(project: str, ranges: str) -> List[Tuple[int, str]]:
-    ''' Returns a list of species that have ranges that intersect with project polygon
-        To Check: does it give the correct answers? , What do we want the output to be like?
-        (id_nos are float but should maybe be int)
-
-        Parameters:
-        Project: the file address of a project polygon
-        Ranges: the file address of the species' range polygons
-
-        Output: dictionary of id_no and binomial for species that are present
-    '''
-    # IMPORT PROJECT POLYGON
-    project_polygon = gpd.read_file(project)
-    # IMPORT SPECIES RANGES FILTERED BY WHETHER THEY INTERSECT WITH THE PROJECT POLYGON
-    ranges_gdf = gpd.read_file(ranges, mask=project_polygon)
-    # CONVERT TO DATAFRAME
-    # Note: Not sure if all of these steps are necessary
-    ranges_df = pd.DataFrame(ranges_gdf) # I think stops it being a spatial database?
-    # EXTRACT A LIST OF UNIQUE ID_NO and UNIQUE BIOMIALS
-    id_list = [int(x) for x in ranges_df['id_no'].unique().tolist()]
-    binomial_list = ranges_df['binomial'].unique().tolist()
-    return zip(id_list, binomial_list)
-
-def seasonality_for_species(species: Taxon, range_file: str) -> Set[str]:
-    og_seasons = set(
-        seasonality.habitatSeasonality(species) +
-        seasonality.rangeSeasonality(range_file, species.taxonid)
-    )
-    if len(og_seasons) == 0:
-        return {}
-    seasons = {'resident'}
-    if len(og_seasons.difference({'resident'})) > 0:
-        seasons = {'breeding', 'nonbreeding'}
-    return seasons
 
 def main() -> None:
     parser = argparse.ArgumentParser(description="Species and seasonality generator.")
     parser.add_argument(
-        '--experiment',
-        type=str,
-        help="name of experiment group from configuration json",
-        required=True,
-        dest="experiment"
-    )
-    parser.add_argument(
-        '--config',
-        type=str,
-        help="path of configuration json",
-        required=False,
-        dest="config_path",
-        default="config.json"
-    )
-    parser.add_argument(
-        '--project',
+        '--input',
         type=str,
-        help="name of project file geojson",
+        help="directory with taxa folders of species info",
         required=True,
-        dest="project"
+        dest="input_dir"
     )
     parser.add_argument(
         '--output',
@@ -87,71 +58,9 @@ def main() -> None:
         required=False,
         dest="output"
     )
-    parser.add_argument(
-        '--epochs',
-        type=str,
-        help="comma seperated (but no spaces!) list of experiments to run for",
-        required=True,
-        dest="epochs"
-    )
-    args = vars(parser.parse_args())
-
-    try:
-        with open(args['config_path'], 'r', encoding='utf-8') as config_file:
-            config = json.load(config_file)
-    except FileNotFoundError:
-        print(f'Failed to find configuration json file {args["config_path"]}')
-        sys.exit(-1)
-    except json.decoder.JSONDecodeError as exc:
-        print(f'Failed to parse {args["config_path"]} at line {exc.lineno}, column {exc.colno}: {exc.msg}')
-        sys.exit(-1)
-
-    try:
-        experiment = config['experiments'][args['experiment']]
-    except KeyError:
-        if not 'experiments' in config:
-            print("No experiments section founnd in configuration json")
-        else:
-            print(f'Failed to find experiment with name {args["experiment"]}. Options found:')
-            for experiment in config['experiments']:
-                print(f'\t{experiment}')
-        sys.exit(-1)
-
-    epoch_list = args['epochs'].split(',')
-
-    try:
-        range_path = experiment['range']
-    except KeyError:
-        print(f'Experiment "{args["experiment"]}" was missing range key.')
-
-    batch = None
-    if 'iucn_batch' in experiment:
-        batch = TaxonFactories.loadBatchSource(experiment['iucn_batch'])
-
-    # Work part 1: get the species list
-    species_list = project_species_list(args["project"], range_path)
-
-    with file_writer(args["output"]) as output:
-        output.write('--taxid,--seasonality,--experiment\n')
-        for species_id, _ in species_list:
-            if batch:
-                # try:
-                species = TaxonFactories.TaxonFactoryRedListBatch(species_id, batch)
-                # except IndexError as e:
-                #     # Some of the data in the batch needs tidy...
-                #     print(f"Oh no {e}")
-                #     continue
-            else:
-                try:
-                    species = TaxonFactories.TaxonFactoryRedListAPI(species_id, config['iucn']['api_key'])
-                except KeyError:
-                    print("Failed to find IUCN API key in config file or batch path in experiment.")
-                    sys.exit(-1)
+    args = parser.parse_args()
 
-            seasonality_list = seasonality_for_species(species, range_path)
-            for season in seasonality_list:
-                for epoch in epoch_list:
-                    output.write(f'{species_id},{season},{epoch}\n')
+    species_generator(args.input_dir, args.output)
 
 if __name__ == "__main__":
     main()

From 45ec9e1c4255642244ea3229c0092b2670be1157 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Mon, 21 Oct 2024 18:21:41 +0100
Subject: [PATCH 18/36] Fixes and tweaks

---
 aoh-calculator                               |   2 +-
 deltap/delta_p_scaled_area.py                |   5 +-
 deltap/global_code_residents_pixel_AE_128.py |  16 ++-
 prepare-layers/make_arable_map.py            |   2 +-
 prepare-species/cleaning.py                  |  28 +++-
 prepare-species/extract_species_psql.py      | 141 +++++++++++++------
 6 files changed, 133 insertions(+), 61 deletions(-)

diff --git a/aoh-calculator b/aoh-calculator
index 412433a..487c8ef 160000
--- a/aoh-calculator
+++ b/aoh-calculator
@@ -1 +1 @@
-Subproject commit 412433a917fb32160f9cd3989ad9efc2ae8ebea0
+Subproject commit 487c8ef0bb16d0c632bfd6412b386376fac0abd9
diff --git a/deltap/delta_p_scaled_area.py b/deltap/delta_p_scaled_area.py
index 93e9213..2e2d8d7 100644
--- a/deltap/delta_p_scaled_area.py
+++ b/deltap/delta_p_scaled_area.py
@@ -22,7 +22,10 @@ def delta_p_scaled_area(
     area_restore = RasterLayer.layer_from_file(diff_area_map_path)
 
     for layer in per_taxa:
-        layer.set_window_for_union(area_restore.area)
+        try:
+            layer.set_window_for_union(area_restore.area)
+        except ValueError:
+            layer.set_window_for_intersection(area_restore.area)
 
     area_restore_filter = area_restore.numpy_apply(lambda c: np.where(c < SCALE, float('nan'), c)) / SCALE
 
diff --git a/deltap/global_code_residents_pixel_AE_128.py b/deltap/global_code_residents_pixel_AE_128.py
index 567c0dd..24cecdc 100644
--- a/deltap/global_code_residents_pixel_AE_128.py
+++ b/deltap/global_code_residents_pixel_AE_128.py
@@ -91,11 +91,13 @@ def global_code_residents_pixel_ae(
             except FileNotFoundError:
                 print(f"Failed to open current layer {os.path.join(current_aohs_path, filename)}")
                 sys.exit()
+
             try:
                 scenario = open_layer_as_float64(os.path.join(scenario_aohs_path, filename))
             except FileNotFoundError:
-                print(f"Failed to open scenario layer {os.path.join(scenario_aohs_path, filename)}")
-                sys.exit()
+                # If there is a current but now scenario file it's because the species went extinct under the scenario
+                scenario = ConstantLayer(0.0)
+
             try:
                 historic_AOH = RasterLayer.layer_from_file(os.path.join(historic_aohs_path, filename)).sum()
             except FileNotFoundError as fnf:
@@ -106,7 +108,7 @@ def global_code_residents_pixel_ae(
                 print(f"Historic AoH for {taxid} is zero, aborting")
                 sys.exit()
 
-            print(f"current: {current.sum()}\nscenario: {scenario.sum()}\nhistoric: {historic_AOH.sum()}")
+            # print(f"current: {current.sum()}\nscenario: {scenario.sum()}\nhistoric: {historic_AOH.sum()}")
 
             layers = [current, scenario]
             union = RasterLayer.find_union(layers)
@@ -169,13 +171,13 @@ def global_code_residents_pixel_ae(
             try:
                 scenario_breeding = open_layer_as_float64(breeding_scenario_path)
             except FileNotFoundError as fnf:
-                print(f"Failed to open scenario breeding {breeding_scenario_path}")
-                sys.exit()
+                # If there is a current but now scenario file it's because the species went extinct under the scenario
+                scenario_breeding = ConstantLayer(0.0)
             try:
                 scenario_non_breeding = open_layer_as_float64(non_breeding_scenario_path)
             except FileNotFoundError as fnf:
-                print(f"Failed to open sceario non breeding{fnf.filename}")
-                sys.exit()
+                # If there is a current but now scenario file it's because the species went extinct under the scenario
+                scenario_non_breeding = ConstantLayer(0.0)
 
             layers = [current_breeding, current_non_breeding, scenario_breeding, scenario_non_breeding]
             union = RasterLayer.find_union(layers)
diff --git a/prepare-layers/make_arable_map.py b/prepare-layers/make_arable_map.py
index e90483a..aaa3f9d 100644
--- a/prepare-layers/make_arable_map.py
+++ b/prepare-layers/make_arable_map.py
@@ -19,7 +19,7 @@
     "4", "4.1", "4.2", "4.3", "4.4", "4.5", "4.6", "4.7",
     "6",
     "8", "8.1", "8.2", "8.3",
-    "14.1", "14.2", "14.3", "14.4", "14.6", # urban removed
+    "14.1", "14.2", "14.3", "14.4", "14.6", # urban (14.5) removed
     #"16", # Not in crosswalk due to iucn_modlib
     "17",
     #"18", # Not in crosswalk due to iucn_modlib
diff --git a/prepare-species/cleaning.py b/prepare-species/cleaning.py
index 0d1d8fb..718a92c 100644
--- a/prepare-species/cleaning.py
+++ b/prepare-species/cleaning.py
@@ -8,21 +8,26 @@
 def tidy_data(row: pd.Series) -> pd.Series:
     """Tidy up the data as per Busana et al"""
 
+    # Lower elevation higher than upper elevation
+    if not pd.isna(row.elevation_lower) and not pd.isna(row.elevation_upper):
+        if row.elevation_lower > row.elevation_upper:
+            row.elevation_lower = ELEVATION_MIN
+            row.elevation_upper = ELEVATION_MAX
+
     # Missing lower and/or upper elevation
-    if row.elevation_lower is None:
+    if pd.isna(row.elevation_lower):
         row.elevation_lower = ELEVATION_MIN
-    if row.elevation_upper is None:
+        if not pd.isna(row.elevation_upper) and row.elevation_upper < ELEVATION_MIN:
+            row.elevation_upper = ELEVATION_MAX
+    if pd.isna(row.elevation_upper):
         row.elevation_upper = ELEVATION_MAX
+        if row.elevation_lower > ELEVATION_MAX:
+            row.elevation_lower = ELEVATION_MIN
 
     # Lower elevation < -500 and/or upper elevation > 9000
     row.elevation_lower = max(ELEVATION_MIN, row.elevation_lower)
     row.elevation_upper = min(ELEVATION_MAX, row.elevation_upper)
 
-    # Lower elevation higher than upper elevation
-    if row.elevation_lower > row.elevation_upper:
-        row.elevation_lower = ELEVATION_MIN
-        row.elevation_upper = ELEVATION_MAX
-
     # Small difference (<50m) between lower and upper elevation
     elevation_diff = row.elevation_upper - row.elevation_lower
     if elevation_diff < 50:
@@ -31,4 +36,13 @@ def tidy_data(row: pd.Series) -> pd.Series:
         row.elevation_lower -= adjust
         row.elevation_upper += adjust
 
+        if row.elevation_lower < ELEVATION_MIN:
+            adjust = ELEVATION_MIN - row.elevation_lower
+            row.elevation_lower += adjust
+            row.elevation_upper += adjust
+        elif row.elevation_upper > ELEVATION_MAX:
+            adjust = row.elevation_upper - ELEVATION_MAX
+            row.elevation_lower -= adjust
+            row.elevation_upper -= adjust
+
     return row
diff --git a/prepare-species/extract_species_psql.py b/prepare-species/extract_species_psql.py
index 75e8be9..fce0bbb 100644
--- a/prepare-species/extract_species_psql.py
+++ b/prepare-species/extract_species_psql.py
@@ -15,6 +15,8 @@
 from cleaning import tidy_data
 
 logger = logging.getLogger(__name__)
+logging.basicConfig()
+logger.setLevel(logging.DEBUG)
 
 SEASON_NAME = {
     1: "RESIDENT",
@@ -41,6 +43,7 @@
 
 HABITATS_STATEMENT = """
 SELECT
+    assessment_habitats.supplementary_fields->>'season',
     STRING_AGG(habitat_lookup.code, '|') AS full_habitat_code,
     STRING_AGG(system_lookup.description->>'en', '|') AS systems
 FROM
@@ -56,6 +59,7 @@
         assessment_habitats.supplementary_fields->>'suitability' IS NULL
         OR assessment_habitats.supplementary_fields->>'suitability' IN ('Suitable', 'Unknown')
     )
+GROUP BY (assessment_habitats.supplementary_fields->>'season')
 """
 
 GEOMETRY_STATEMENT = """
@@ -112,86 +116,133 @@ def process_row(
     id_no, assessment_id, elevation_lower, elevation_upper = row
 
     cursor.execute(HABITATS_STATEMENT, (assessment_id,))
-    habitats = cursor.fetchall()
+    raw_habitats = cursor.fetchall()
 
-    if len(habitats) == 0:
-        # No matching habitats
+    if len(raw_habitats) == 0:
+        logger.debug("Dropping %s as no habitats found", id_no)
         return
-    elif len(habitats) > 1:
-        raise ValueError("expected just one habitat value")
 
     # Clean up habitats to ensure they're unique (the system agg in the SQL statement might duplicate them)
-    raw_habitats, systems = habitats[0]
+    # In the database there are the following seasons:
+    #    breeding
+    #    Breeding Season
+    #    non-breeding
+    #    Non-Breeding Season
+    #    passage
+    #    Passage
+    #    resident
+    #    Resident
+    #    Seasonal Occurrence Unknown
+    #    unknown
+    #    null
+
+    habitats = {}
+    for season, habitat_values, systems in raw_habitats:
+
+        if season in ['passage', 'Passage']:
+            continue
+        elif season in ['resident', 'Resident', 'Seasonal Occurrence Unknown', 'unknown', None]:
+            season_code = 1
+        elif season in ['breeding', 'Breeding Season']:
+            season_code = 2
+        elif season in ['non-breeding', 'Non-Breeding Season']:
+            season_code = 3
+        else:
+            raise ValueError(f"Unexpected season {season} for {id_no}")
 
-    if systems is None:
-        logging.warning("Skipping %s: no systems in DB", id_no)
-        return
-    if "Marine" in systems:
-        logging.info("Skipping %s: marine in systems", id_no)
-        return
+        if systems is None:
+            logger.debug("Dropping %s: no systems in DB", id_no)
+            continue
+        if "Marine" in systems:
+            logger.debug("Dropping %s: marine in systems", id_no)
+            return
+
+        if habitat_values is None:
+            logger.debug("Dropping %s: no habitats in DB", id_no)
+            continue
+        habitat_set = set([x for x in habitat_values.split('|')])
+        if len(habitat_set) == 0:
+            logger.debug("Dropping %s: No habitats", id_no)
+            continue
+        if any([x.startswith('7') for x in habitat_set]):
+            logger.debug("Dropping %s: Habitat 7 in habitat list", id_no)
+            return
+
+        try:
+            habitats[season_code] |= habitat_set
+        except KeyError:
+            habitats[season_code] = habitat_set
 
-    if raw_habitats is None:
-        logging.warning("Skipping %s: no habitats in DB", id_no)
-        return
-    habitats = list(set([x for x in raw_habitats.split('|')]))
     if len(habitats) == 0:
-        logging.info("Skipping %s: No habitats", id_no)
         return
-    if any([x.startswith('7') for x in habitats]):
-        logging.info("Skipping %s: Habitat 7 in habitat list", id_no)
-        return
-
-    full_habitat_code = '|'.join(habitats)
 
     cursor.execute(GEOMETRY_STATEMENT, (assessment_id, presence))
     geometries_data = cursor.fetchall()
     if len(geometries_data) == 0:
-        logging.info("Skipping %s: no habitats", id_no)
+        logger.info("Dropping %s: no habitats", id_no)
         return
     geometries = {}
     for season, geometry in geometries_data:
         geometries[season] = shapely.normalize(shapely.from_wkb(geometry.to_ewkb()))
 
-    seasons = list(geometries.keys())
-    if seasons == [1]:
+    seasons = set(geometries.keys()) | set(habitats.keys())
+
+    if seasons == {1}:
         # Resident only
         gdf = gpd.GeoDataFrame(
-            [[id_no, SEASON_NAME[1], int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, full_habitat_code, geometries[1]]],
+            [[id_no, SEASON_NAME[1], int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, '|'.join(list(habitats[1])), geometries[1]]],
             columns=["id_no", "season", "elevation_lower", "elevation_upper", "full_habitat_code", "geometry"],
             crs='epsg:4326'
         )
         tidy_reproject_save(gdf, output_directory_path)
     else:
         # Breeding and non-breeding
-        if 1 in seasons and 2 in seasons:
-            season_2 = shapely.union(geometries[2], geometries[1])
-        elif 2 in seasons:
-            season_2 = geometries[2]
-        elif 1 in seasons:
-            season_2 = geometries[1]
-        else:
-            logging.info("Skipping %s: no geometries for breeding", id_no)
+        # Sometimes in the IUCN database there's only data on one season (e.g., AVES 103838515), and so
+        # we need to do another sanity check to make sure both have useful data before we write out
+
+        geometries_seasons_breeding = set(geometries.keys())
+        geometries_seasons_breeding.discard(3)
+        geometries_breeding = [geometries[x] for x in geometries_seasons_breeding]
+        if len(geometries_breeding) == 0:
+            logger.debug("Dropping %s as no breeding geometries", id_no)
             return
+        geometry_breeding = shapely.union_all(geometries_breeding)
 
-        if 1 in seasons and 3 in seasons:
-            season_3 = shapely.union(geometries[3], geometries[1])
-        elif 3 in seasons:
-            season_3 = geometries[3]
-        elif 1 in seasons:
-            season_3 = geometries[1]
-        else:
-            logging.info("Skipping %s: no geometries for non-breeding", id_no)
+        geometries_seasons_non_breeding = set(geometries.keys())
+        geometries_seasons_non_breeding.discard(2)
+        geometries_non_breeding = [geometries[x] for x in geometries_seasons_non_breeding]
+        if len(geometries_non_breeding) == 0:
+            logger.debug("Dropping %s as no non-breeding geometries", id_no)
+            return
+        geometry_non_breeding = shapely.union_all(geometries_non_breeding)
+
+        habitats_seasons_breeding = set(habitats.keys())
+        habitats_seasons_breeding.discard(3)
+        habitats_breeding = set()
+        for season in habitats_seasons_breeding:
+            habitats_breeding |= habitats[season]
+        if len(habitats_breeding) == 0:
+            logger.debug("Dropping %s as no breeding habitats", id_no)
+            return
+
+        habitats_seasons_non_breeding = set(habitats.keys())
+        habitats_seasons_non_breeding.discard(2)
+        habitats_non_breeding = set()
+        for season in habitats_seasons_non_breeding:
+            habitats_non_breeding |= habitats[season]
+        if len(habitats_non_breeding) == 0:
+            logger.debug("Dropping %s as no non-breeding habitats", id_no)
             return
 
         gdf = gpd.GeoDataFrame(
-            [[id_no, SEASON_NAME[2], int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, full_habitat_code, season_2]],
+            [[id_no, SEASON_NAME[2], int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, '|'.join(list(habitats_breeding)), geometry_breeding]],
             columns=["id_no", "season", "elevation_lower", "elevation_upper", "full_habitat_code", "geometry"],
             crs='epsg:4326'
         )
         tidy_reproject_save(gdf, output_directory_path)
 
         gdf = gpd.GeoDataFrame(
-            [[id_no, SEASON_NAME[3], int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, full_habitat_code, season_3]],
+            [[id_no, SEASON_NAME[3], int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, '|'.join(list(habitats_non_breeding)), geometry_non_breeding]],
             columns=["id_no", "season", "elevation_lower", "elevation_upper", "full_habitat_code", "geometry"],
             crs='epsg:4326',
         )
@@ -216,6 +267,8 @@ def extract_data_per_species(
         # and I need to make a follow on DB query per result.
         results = cursor.fetchall()
 
+        logger.info("Found %d species in class %s in scenarion %s", len(results), classname, era)
+
         # The limiting amount here is how many concurrent connections the database can take
         with Pool(processes=20) as pool:
             pool.map(partial(process_row, era_output_directory_path, presence), results)

From 6ab2194a258ff984da808265b0d954f3c1fed539 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Tue, 22 Oct 2024 09:35:17 +0100
Subject: [PATCH 19/36] Add simple run script

---
 scripts/run.sh | 106 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100755 scripts/run.sh

diff --git a/scripts/run.sh b/scripts/run.sh
new file mode 100755
index 0000000..92a2263
--- /dev/null
+++ b/scripts/run.sh
@@ -0,0 +1,106 @@
+#!/bin/bash
+#
+# Assumes you've set up a python virtual environement in the current directory.
+#
+# In addition to the Python environemnt, you will need the following extra command line tools:
+#
+# https://github.com/quantifyearth/reclaimer - used to download inputs from Zenodo directly
+# https://github.com/quantifyearth/littlejohn - used to run batch jobs in parallel
+
+
+set -e
+
+# Get habitat layer and prepare for use
+reclaimer zenodo --zenodo_id 4058819 \
+                 --filename iucn_habitatclassification_composite_lvl2_ver004.zip \
+                 --extract \
+                 --output ${DATADIR}/habitat/jung_l2_raw.tif
+
+python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/current_raw.tif \
+                                            --scale 0.016666666666667 \
+                                            --output ${DATADIR}/habitat_maps/current/
+
+# Get PNV layer and prepare for use
+reclaimer zenodo --zenodo_id 4038749 \
+                 --filename pnv_lvl1_004.zip \
+                 --extract \
+                 --output ${DATADIR}/habitat/pnv_raw.tif
+
+python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/pnv_raw.tif \
+                                            --scale 0.016666666666667 \
+                                            --output ${DATADIR}/habitat_maps/pnv/
+
+# Generate an area scaling map
+python3 ./prepare-layers/make_area_map.py --scale 0.016666666666667 --output ${DATADIR}/habitat/area-per-pixel.tif
+
+# Generate the arable scenario map
+python3 ./prepare-layers/make_arable_map.py --current ${DATADIR}/habitat/current_raw.tif \
+                                  --crosswalk ${DATADIR}/crosswalk.csv \
+                                  --output ${DATADIR}/habitat/arable.tif
+
+python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/arable.tif \
+                                            --scale 0.016666666666667 \
+                                            --output ${DATADIR}/habitat_maps/arable/
+
+python3 ./prepare-layers/make_diff_map.py --current ${DATADIR}/habitat/current_raw.tif \
+                                          --scenario ${DATADIR}/habitat/restore.tif \
+                                          --area ${DATADIR}/area-per-pixel.tif \
+                                          --scale 0.016666666666667 \
+                                          --output ${DATADIR}/habitat/restore_diff_area.tif
+
+# Generate the restore map
+python3 ./prepare-layers/make_restore_map.py --pnv ${DATADIR}/habitat/pnv_raw.tif \
+                                   --current ${DATADIR}/habitat/current_raw.tif \
+                                   --crosswalk ${DATADIR}/crosswalk.csv \
+                                   --output ${DATADIR}/habitat/restore.tif
+
+python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/restore.tif \
+                                             --scale 0.016666666666667 \
+                                             --output ${DATADIR}/habitat_maps/restore/
+
+python3 ./prepare-layers/make_diff_map.py --current ${DATADIR}/habitat/current_raw.tif \
+                                          --scenario ${DATADIR}/habitat/arable.tif \
+                                          --area ${DATADIR}/area-per-pixel.tif \
+                                          --scale 0.016666666666667 \
+                                          --output ${DATADIR}/habitat/arable_diff_area.tif
+
+# Fetch and prepare the elevation layers
+reclaimer zenodo --zenodo_id 5719984  --filename dem-100m-esri54017.tif --output ${DATADIR}/elevation.tif
+gdalwarp -t_srs EPSG:4326 -tr 0.016666666666667 -0.016666666666667 -r max -co COMPRESS=LZW -wo NUM_THREADS=40 ${DATADIR}/elevation.tif ${DATADIR}/elevation-max-1k.tif
+gdalwarp -t_srs EPSG:4326 -tr 0.016666666666667 -0.016666666666667 -r min -co COMPRESS=LZW -wo NUM_THREADS=40 ${DATADIR}/elevation.tif ${DATADIR}/elevation-min-1k.tif
+
+# Get species data per taxa from IUCN data
+python3 ./prepare-species/extract_species_psql.py --class AVES --output ${DATADIR}/species-info/AVES/ --projection "EPSG:4326"
+python3 ./prepare-species/extract_species_psql.py --class AMPHIBIA --output ${DATADIR}/species-info/AMPHIBIA/ --projection "EPSG:4326"
+python3 ./prepare-species/extract_species_psql.py --class MAMMALIA --output ${DATADIR}/species-info/MAMMALIA/ --projection "EPSG:4326"
+python3 ./prepare-species/extract_species_psql.py --class REPTILIA --output ${DATADIR}/species-info/REPTILIA/ --projection "EPSG:4326"
+
+# Generate the batch job input CSVs
+python3 ./utils/speciesgenerator.py --input ${DATADIR}/species-info --output ${DATADIR}/aohbatch.csv
+python3 ./utils/persistencegenerator.py --input ${DATADIR}/species-info --output ${DATADIR}/persistencebatch.csv
+
+# Calculate all the AoHs
+littlejohn -j 200 -c ${DATADIR}/arable_aohbatch.csv ${PWD}/venv/bin/python3 -- ./aoh-calculator/aohcalc.py
+
+# Calculate the per species Delta P values
+littlejohn -j 150 -c ${DATADIR}/arable_persistencebatch.csv ${PWD}/venv/bin/python3 --  ./deltap/global_code_residents_pixel_AE_128.py
+
+# Per scenario per taxa sum the delta Ps
+python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/arable/0.25/REPTILIA/ --output ${DATADIR}/deltap_sum/arable/0.25/REPTILIA.tif
+python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/arable/0.25/AVES/ --output ${DATADIR}/deltap_sum/arable/0.25/AVES.tif
+python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/arable/0.25/MAMMALIA/ --output ${DATADIR}/deltap_sum/arable/0.25/MAMMALIA.tif
+python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/arable/0.25/AMPHIBIA/ --output ${DATADIR}/deltap_sum/arable/0.25/AMPHIBIA.tif
+
+python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/restore/0.25/MAMMALIA/ --output ${DATADIR}/deltap_sum/restore/0.25/MAMMALIA.tif
+python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/restore/0.25/AMPHIBIA/ --output ${DATADIR}/deltap_sum/restore/0.25/AMPHIBIA.tif
+python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/restore/0.25/REPTILIA/ --output ${DATADIR}/deltap_sum/restore/0.25/REPTILIA.tif
+python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/restore/0.25/AVES/ --output ${DATADIR}/deltap_sum/restore/0.25/AVES.tif
+
+# Generate final map
+python3 ./deltap/delta_p_scaled_area.py --input ${DATADIR}/deltap_sum/restore/0.25/ \
+                                    --diffmap ${DATADIR}/habitat/restore_diff_area.tif \
+                                    --output ${DATADIR}/deltap_final/scaled_restore_0.25.tif
+
+python3 ./deltap/delta_p_scaled_area.py --input ${DATADIR}/deltap_sum/arable/0.25/ \
+                                    --diffmap ${DATADIR}/habitat/arable_diff_area.tif \
+                                    --output ${DATADIR}/deltap_final/scaled_arable_0.25.tif

From 667119d435fdd3dea0ba56307f0100fa123187e9 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Mon, 28 Oct 2024 11:21:16 +0000
Subject: [PATCH 20/36] Update aoh-calculator

---
 aoh-calculator | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aoh-calculator b/aoh-calculator
index 487c8ef..de5db6f 160000
--- a/aoh-calculator
+++ b/aoh-calculator
@@ -1 +1 @@
-Subproject commit 487c8ef0bb16d0c632bfd6412b386376fac0abd9
+Subproject commit de5db6fbe4b4cbcebf4ae04df9817fac680b700c

From a8fef7468c11e7e808e78e890c766a626856d000 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Mon, 28 Oct 2024 11:41:43 +0000
Subject: [PATCH 21/36] Fixes and tweaks

---
 prepare-species/extract_species_psql.py | 34 ++++++++++++++++++++-----
 scripts/run.sh                          |  8 +++---
 utils/persistencegenerator.py           | 22 +++++++++++-----
 utils/speciesgenerator.py               | 26 ++++++++++++-------
 4 files changed, 64 insertions(+), 26 deletions(-)

diff --git a/prepare-species/extract_species_psql.py b/prepare-species/extract_species_psql.py
index fce0bbb..fcf314a 100644
--- a/prepare-species/extract_species_psql.py
+++ b/prepare-species/extract_species_psql.py
@@ -24,6 +24,15 @@
     3: "NONBREEDING",
 }
 
+COLUMNS = [
+    "id_no",
+    "season",
+    "elevation_lower",
+    "elevation_upper",
+    "full_habitat_code",
+    "geometry"
+]
+
 MAIN_STATEMENT = """
 SELECT
     assessments.sis_taxon_id as id_no,
@@ -70,10 +79,11 @@
     assessments
     LEFT JOIN assessment_ranges On assessment_ranges.assessment_id = assessments.id
 WHERE
+    -- LIFE doesn't use passage (season 4), and treats unknown (season 5) as resident.
     assessments.id = %s
     AND assessment_ranges.presence IN %s
     AND assessment_ranges.origin IN (1, 2, 6)
-    AND assessment_ranges.seasonal IN (1, 2, 3)
+    AND assessment_ranges.seasonal IN (1, 2, 3, 5)
 """
 
 DB_HOST = os.getenv("DB_HOST")
@@ -183,7 +193,20 @@ def process_row(
         return
     geometries = {}
     for season, geometry in geometries_data:
-        geometries[season] = shapely.normalize(shapely.from_wkb(geometry.to_ewkb()))
+        grange = shapely.normalize(shapely.from_wkb(geometry.to_ewkb()))
+
+        match season:
+            case 1 | 5:
+                season_code = 1
+            case 2 | 3:
+                season_code = season
+            case _:
+                raise ValueError(f"Unexpected season: {season}")
+
+        try:
+            geometries[season_code] = shapely.union(geometries[season_code], grange)
+        except KeyError:
+            geometries[season_code] = grange
 
     seasons = set(geometries.keys()) | set(habitats.keys())
 
@@ -191,7 +214,7 @@ def process_row(
         # Resident only
         gdf = gpd.GeoDataFrame(
             [[id_no, SEASON_NAME[1], int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, '|'.join(list(habitats[1])), geometries[1]]],
-            columns=["id_no", "season", "elevation_lower", "elevation_upper", "full_habitat_code", "geometry"],
+            columns=COLUMNS,
             crs='epsg:4326'
         )
         tidy_reproject_save(gdf, output_directory_path)
@@ -236,20 +259,19 @@ def process_row(
 
         gdf = gpd.GeoDataFrame(
             [[id_no, SEASON_NAME[2], int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, '|'.join(list(habitats_breeding)), geometry_breeding]],
-            columns=["id_no", "season", "elevation_lower", "elevation_upper", "full_habitat_code", "geometry"],
+            columns=COLUMNS,
             crs='epsg:4326'
         )
         tidy_reproject_save(gdf, output_directory_path)
 
         gdf = gpd.GeoDataFrame(
             [[id_no, SEASON_NAME[3], int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, '|'.join(list(habitats_non_breeding)), geometry_non_breeding]],
-            columns=["id_no", "season", "elevation_lower", "elevation_upper", "full_habitat_code", "geometry"],
+            columns=COLUMNS,
             crs='epsg:4326',
         )
         tidy_reproject_save(gdf, output_directory_path)
 
 
-
 def extract_data_per_species(
     classname: str,
     output_directory_path: str,
diff --git a/scripts/run.sh b/scripts/run.sh
index 92a2263..7fa7b99 100755
--- a/scripts/run.sh
+++ b/scripts/run.sh
@@ -76,14 +76,14 @@ python3 ./prepare-species/extract_species_psql.py --class MAMMALIA --output ${DA
 python3 ./prepare-species/extract_species_psql.py --class REPTILIA --output ${DATADIR}/species-info/REPTILIA/ --projection "EPSG:4326"
 
 # Generate the batch job input CSVs
-python3 ./utils/speciesgenerator.py --input ${DATADIR}/species-info --output ${DATADIR}/aohbatch.csv
-python3 ./utils/persistencegenerator.py --input ${DATADIR}/species-info --output ${DATADIR}/persistencebatch.csv
+python3 ./utils/speciesgenerator.py --input ${DATADIR}/species-info --datadir ${DATADIR} --output ${DATADIR}/aohbatch.csv
+python3 ./utils/persistencegenerator.py --input ${DATADIR}/species-info --datadir ${DATADIR} --output ${DATADIR}/persistencebatch.csv
 
 # Calculate all the AoHs
-littlejohn -j 200 -c ${DATADIR}/arable_aohbatch.csv ${PWD}/venv/bin/python3 -- ./aoh-calculator/aohcalc.py
+littlejohn -j 200 -c ${DATADIR}/aohbatch.csv ${PWD}/venv/bin/python3 -- ./aoh-calculator/aohcalc.py --force-habitat
 
 # Calculate the per species Delta P values
-littlejohn -j 150 -c ${DATADIR}/arable_persistencebatch.csv ${PWD}/venv/bin/python3 --  ./deltap/global_code_residents_pixel_AE_128.py
+littlejohn -j 200 -c ${DATADIR}/persistencebatch.csv ${PWD}/venv/bin/python3 --  ./deltap/global_code_residents_pixel_AE_128.py
 
 # Per scenario per taxa sum the delta Ps
 python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/arable/0.25/REPTILIA/ --output ${DATADIR}/deltap_sum/arable/0.25/REPTILIA.tif
diff --git a/utils/persistencegenerator.py b/utils/persistencegenerator.py
index a22ccd2..7a6cf34 100644
--- a/utils/persistencegenerator.py
+++ b/utils/persistencegenerator.py
@@ -8,6 +8,7 @@
 
 def species_generator(
     input_dir: str,
+    data_dir: str,
     output_csv_path: str
 ):
     taxas = os.listdir(input_dir)
@@ -19,12 +20,12 @@ def species_generator(
         for scenario in ['arable', 'restore']:
             for species in speciess:
                 res.append([
-                    os.path.join('/home/mwd24/lifetest/species-info/', taxa, 'current', species),
-                    os.path.join('/home/mwd24/lifetest/aohs/', 'current', taxa),
-                    os.path.join('/home/mwd24/lifetest/aohs/', scenario, taxa),
-                    os.path.join('/home/mwd24/lifetest/aohs/', 'pnv', taxa),
+                    os.path.join(data_dir, 'species-info', taxa, 'current', species),
+                    os.path.join(data_dir, 'aohs', 'current', taxa),
+                    os.path.join(data_dir, 'aohs', scenario, taxa),
+                    os.path.join(data_dir, 'aohs', 'pnv', taxa),
                     '0.25',
-                    os.path.join('/home/mwd24/lifetest/deltap/', scenario, '0.25', taxa),
+                    os.path.join(data_dir, 'deltap', scenario, '0.25', taxa),
                 ])
 
 
@@ -48,16 +49,23 @@ def main() -> None:
         required=True,
         dest="input_dir"
     )
+    parser.add_argument(
+        '--datadir',
+        type=str,
+        help="directory for results",
+        required=True,
+        dest="data_dir",
+    )
     parser.add_argument(
         '--output',
         type=str,
         help="name of output file for csv",
-        required=False,
+        required=True,
         dest="output"
     )
     args = parser.parse_args()
 
-    species_generator(args.input_dir, args.output)
+    species_generator(args.input_dir, args.data_dir, args.output)
 
 if __name__ == "__main__":
     main()
diff --git a/utils/speciesgenerator.py b/utils/speciesgenerator.py
index 7c0f092..0a15d0a 100644
--- a/utils/speciesgenerator.py
+++ b/utils/speciesgenerator.py
@@ -8,6 +8,7 @@
 
 def species_generator(
     input_dir: str,
+    data_dir: str,
     output_csv_path: str
 ):
     taxas = os.listdir(input_dir)
@@ -20,13 +21,13 @@ def species_generator(
             speciess = os.listdir(taxa_path)
             for species in speciess:
                 res.append([
-                    os.path.join('/home/mwd24/lifetest/habitat_maps', scenario),
-                    '/home/mwd24/lifetest/elevation-max-1k.tif',
-                    '/home/mwd24/lifetest/elevation-min-1k.tif',
-                    '/home/mwd24/lifetest/area-per-pixel.tif',
-                    '/home/mwd24/lifetest/crosswalk.csv',
-                    os.path.join('/home/mwd24/lifetest/species-info/', taxa, source, species),
-                    os.path.join('/home/mwd24/lifetest/aohs/', scenario, taxa)
+                    os.path.join(os.path.join(data_dir, "habitat_maps"), scenario),
+                    os.path.join(data_dir, "elevation-max-1k.tif"),
+                    os.path.join(data_dir, "elevation-min-1k.tif"),
+                    os.path.join(data_dir, "area-per-pixel.tif"),
+                    os.path.join(data_dir, "crosswalk.csv"),
+                    os.path.join(os.path.join(data_dir, "species-info/"), taxa, source, species),
+                    os.path.join(os.path.join(data_dir, "aohs/"), scenario, taxa)
                 ])
 
 
@@ -51,16 +52,23 @@ def main() -> None:
         required=True,
         dest="input_dir"
     )
+    parser.add_argument(
+        '--datadir',
+        type=str,
+        help="directory for results",
+        required=True,
+        dest="data_dir",
+    )
     parser.add_argument(
         '--output',
         type=str,
         help="name of output file for csv",
-        required=False,
+        required=True,
         dest="output"
     )
     args = parser.parse_args()
 
-    species_generator(args.input_dir, args.output)
+    species_generator(args.input_dir, args.data_dir, args.output)
 
 if __name__ == "__main__":
     main()

From 89da50fd40a95626719c638c06acb2af10acb457 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Tue, 29 Oct 2024 12:38:45 +0000
Subject: [PATCH 22/36] Make arable map match original work.

---
 prepare-layers/make_arable_map.py | 52 ++-----------------------------
 1 file changed, 3 insertions(+), 49 deletions(-)

diff --git a/prepare-layers/make_arable_map.py b/prepare-layers/make_arable_map.py
index aaa3f9d..1288252 100644
--- a/prepare-layers/make_arable_map.py
+++ b/prepare-layers/make_arable_map.py
@@ -1,60 +1,23 @@
 import argparse 
-import itertools
-import os
-import shutil
-import tempfile
 from typing import Dict, List, Optional
 
 import numpy as np
-import pandas as pd
 from alive_progress import alive_bar
 from yirgacheffe.layers import RasterLayer
 
-# From Eyres et al:
-# All natural terrestrial habitats and non-urban artificial habitats
-IUCN_CODE_NATURAL = [
-    "1", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7", "1.8", "1.9",
-    "2", "2.1", "2.2",
-    "3", "3.1", "3.2", "3.3", "3.4", "3.5", "3.6", "3.7", "3.8",
-    "4", "4.1", "4.2", "4.3", "4.4", "4.5", "4.6", "4.7",
-    "6",
-    "8", "8.1", "8.2", "8.3",
-    "14.1", "14.2", "14.3", "14.4", "14.6", # urban (14.5) removed
-    #"16", # Not in crosswalk due to iucn_modlib
-    "17",
-    #"18", # Not in crosswalk due to iucn_modlib
-]
-ARABLE = "14.1"
-
-def load_crosswalk_table(table_file_name: str) -> Dict[str,int]:
-    rawdata = pd.read_csv(table_file_name)
-    result = {}
-    for _, row in rawdata.iterrows():
-        try:
-            result[row.code].append(int(row.value))
-        except KeyError:
-            result[row.code] = [int(row.value)]
-    return result
-
+JUNG_ARABLE_CODE = 1401
+JUNG_URBAN_CODE = 1405
 
 def make_arable_map(
     current_path: str,
-    crosswalk_path: str,
     output_path: str,
     concurrency: Optional[int],
     show_progress: bool,
 ) -> None:
     with RasterLayer.layer_from_file(current_path) as current:
-        crosswalk = load_crosswalk_table(crosswalk_path)
-
-        map_replace_codes = list(set(list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_NATURAL]))))
-        print(map_replace_codes)
-        # arable_code = crosswalk[ARABLE][0]
-        arable_code = 1401 # This is a hack as Daniele's crosswalk has 14.1 mapped to both 1400 and 1401 and there's no logical way
-        # to understand this
 
         calc = current.numpy_apply(
-            lambda a: np.where(np.isin(a, map_replace_codes), arable_code, a)
+            lambda a: np.where(a != JUNG_URBAN_CODE, JUNG_ARABLE_CODE, a)
         )
 
         with RasterLayer.empty_raster_layer_like(
@@ -68,7 +31,6 @@ def make_arable_map(
             else:
                 calc.parallel_save(result, parallelism=concurrency)
 
-
 def main() -> None:
     parser = argparse.ArgumentParser(description="Generate the arable scenario map.")
     parser.add_argument(
@@ -78,13 +40,6 @@ def main() -> None:
         required=True,
         dest='current_path',
     )
-    parser.add_argument(
-        '--crosswalk',
-        type=str,
-        help='Path of map to IUCN crosswalk table',
-        required=True,
-        dest='crosswalk_path',
-    )
     parser.add_argument(
         '--output',
         type=str,
@@ -112,7 +67,6 @@ def main() -> None:
 
     make_arable_map(
         args.current_path,
-        args.crosswalk_path,
         args.results_path,
         args.concurrency,
         args.show_progress,

From 150166a55ce162c3eeb41afd3689ae29e28b550a Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Thu, 31 Oct 2024 10:05:26 +0000
Subject: [PATCH 23/36] Build for full pipeline run

---
 deltap/delta_p_scaled_area.py                |  4 ++
 deltap/global_code_residents_pixel_AE_128.py | 17 +++--
 prepare-species/extract_species_psql.py      |  4 +-
 scripts/run.sh                               | 71 ++++++++++++--------
 utils/persistencegenerator.py                | 17 ++---
 utils/raster_sum.py                          |  3 +-
 6 files changed, 72 insertions(+), 44 deletions(-)

diff --git a/deltap/delta_p_scaled_area.py b/deltap/delta_p_scaled_area.py
index 2e2d8d7..a308447 100644
--- a/deltap/delta_p_scaled_area.py
+++ b/deltap/delta_p_scaled_area.py
@@ -1,5 +1,6 @@
 import argparse
 import os
+import sys
 from glob import glob
 
 import numpy as np
@@ -19,6 +20,9 @@ def delta_p_scaled_area(
         RasterLayer.layer_from_file(os.path.join(input_path, x))
         for x in sorted(glob("*.tif", root_dir=input_path))
     ]
+    if not per_taxa:
+        sys.exit(f"Failed to find any per-taxa maps in {input_path}")
+
     area_restore = RasterLayer.layer_from_file(diff_area_map_path)
 
     for layer in per_taxa:
diff --git a/deltap/global_code_residents_pixel_AE_128.py b/deltap/global_code_residents_pixel_AE_128.py
index 24cecdc..079a83f 100644
--- a/deltap/global_code_residents_pixel_AE_128.py
+++ b/deltap/global_code_residents_pixel_AE_128.py
@@ -1,8 +1,10 @@
 import argparse
 import math
 import os
+import shutil
 import sys
 from enum import Enum
+from tempfile import TemporaryDirectory
 
 import geopandas as gpd
 import numpy as np
@@ -126,8 +128,12 @@ def global_code_residents_pixel_ae(
             old_persistence = calc_persistence_value(current_AOH, historic_AOH, z_exponent_func_float)
             print(old_persistence)
             calc = new_p_layer - ConstantLayer(old_persistence)
-            delta_p = RasterLayer.empty_raster_layer_like(new_p_layer, filename=os.path.join(output_folder, filename))
-            calc.save(delta_p)
+
+            with TemporaryDirectory() as tmpdir:
+                tmpfile = os.path.join(tmpdir, filename)
+                with RasterLayer.empty_raster_layer_like(new_p_layer, filename=tmpfile) as delta_p:
+                    calc.save(delta_p)
+                shutil.move(tmpfile, os.path.join(output_folder, filename))
 
         case Season.NONBREEDING:
             nonbreeding_filename = f"{taxid}_{Season.NONBREEDING.name}.tif"
@@ -202,8 +208,11 @@ def global_code_residents_pixel_ae(
 
             delta_p_layer = new_p_layer - ConstantLayer(old_persistence)
 
-            output = RasterLayer.empty_raster_layer_like(new_p_breeding, filename=os.path.join(output_folder, nonbreeding_filename))
-            delta_p_layer.save(output)
+            with TemporaryDirectory() as tmpdir:
+                tmpfile = os.path.join(tmpdir, nonbreeding_filename)
+                with RasterLayer.empty_raster_layer_like(new_p_breeding, filename=tmpfile) as output:
+                    delta_p_layer.save(output)
+                shutil.move(tmpfile, os.path.join(output_folder, nonbreeding_filename))
 
         case Season.BREEDING:
             pass # covered by the nonbreeding case
diff --git a/prepare-species/extract_species_psql.py b/prepare-species/extract_species_psql.py
index fcf314a..8795ead 100644
--- a/prepare-species/extract_species_psql.py
+++ b/prepare-species/extract_species_psql.py
@@ -172,7 +172,6 @@ def process_row(
             continue
         habitat_set = set([x for x in habitat_values.split('|')])
         if len(habitat_set) == 0:
-            logger.debug("Dropping %s: No habitats", id_no)
             continue
         if any([x.startswith('7') for x in habitat_set]):
             logger.debug("Dropping %s: Habitat 7 in habitat list", id_no)
@@ -184,12 +183,13 @@ def process_row(
             habitats[season_code] = habitat_set
 
     if len(habitats) == 0:
+        logger.debug("Dropping %s: No habitats", id_no)
         return
 
     cursor.execute(GEOMETRY_STATEMENT, (assessment_id, presence))
     geometries_data = cursor.fetchall()
     if len(geometries_data) == 0:
-        logger.info("Dropping %s: no habitats", id_no)
+        logger.info("Dropping %s: no geometries", id_no)
         return
     geometries = {}
     for season, geometry in geometries_data:
diff --git a/scripts/run.sh b/scripts/run.sh
index 7fa7b99..60d8aa2 100755
--- a/scripts/run.sh
+++ b/scripts/run.sh
@@ -7,9 +7,22 @@
 # https://github.com/quantifyearth/reclaimer - used to download inputs from Zenodo directly
 # https://github.com/quantifyearth/littlejohn - used to run batch jobs in parallel
 
-
 set -e
 
+if [ -z "${DATADIR}" ]; then
+    echo "Please specify $DATADIR"
+    exit 1
+fi
+
+
+if [ -z "${VIRTUAL_ENV}" ]; then
+    echo "Please specify run in a virtualenv"
+    exit 1
+fi
+
+# declare -a CURVES=("0.1" "0.25" "0.5" "1.0" "gompertz")
+declare -a CURVES=("1.0" "gompertz")
+
 # Get habitat layer and prepare for use
 reclaimer zenodo --zenodo_id 4058819 \
                  --filename iucn_habitatclassification_composite_lvl2_ver004.zip \
@@ -35,7 +48,6 @@ python3 ./prepare-layers/make_area_map.py --scale 0.016666666666667 --output ${D
 
 # Generate the arable scenario map
 python3 ./prepare-layers/make_arable_map.py --current ${DATADIR}/habitat/current_raw.tif \
-                                  --crosswalk ${DATADIR}/crosswalk.csv \
                                   --output ${DATADIR}/habitat/arable.tif
 
 python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/arable.tif \
@@ -43,10 +55,10 @@ python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/arable.
                                             --output ${DATADIR}/habitat_maps/arable/
 
 python3 ./prepare-layers/make_diff_map.py --current ${DATADIR}/habitat/current_raw.tif \
-                                          --scenario ${DATADIR}/habitat/restore.tif \
+                                          --scenario ${DATADIR}/habitat/arable.tif \
                                           --area ${DATADIR}/area-per-pixel.tif \
                                           --scale 0.016666666666667 \
-                                          --output ${DATADIR}/habitat/restore_diff_area.tif
+                                          --output ${DATADIR}/habitat/arable_diff_area.tif
 
 # Generate the restore map
 python3 ./prepare-layers/make_restore_map.py --pnv ${DATADIR}/habitat/pnv_raw.tif \
@@ -59,10 +71,10 @@ python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/restore
                                              --output ${DATADIR}/habitat_maps/restore/
 
 python3 ./prepare-layers/make_diff_map.py --current ${DATADIR}/habitat/current_raw.tif \
-                                          --scenario ${DATADIR}/habitat/arable.tif \
+                                          --scenario ${DATADIR}/habitat/restore.tif \
                                           --area ${DATADIR}/area-per-pixel.tif \
                                           --scale 0.016666666666667 \
-                                          --output ${DATADIR}/habitat/arable_diff_area.tif
+                                          --output ${DATADIR}/habitat/restore_diff_area.tif
 
 # Fetch and prepare the elevation layers
 reclaimer zenodo --zenodo_id 5719984  --filename dem-100m-esri54017.tif --output ${DATADIR}/elevation.tif
@@ -80,27 +92,30 @@ python3 ./utils/speciesgenerator.py --input ${DATADIR}/species-info --datadir ${
 python3 ./utils/persistencegenerator.py --input ${DATADIR}/species-info --datadir ${DATADIR} --output ${DATADIR}/persistencebatch.csv
 
 # Calculate all the AoHs
-littlejohn -j 200 -c ${DATADIR}/aohbatch.csv ${PWD}/venv/bin/python3 -- ./aoh-calculator/aohcalc.py --force-habitat
+littlejohn -j 200 -o ${DATADIR}/aohbatch.log -c ${DATADIR}/aohbatch.csv ${VIRTUAL_ENV}/bin/python3 -- ./aoh-calculator/aohcalc.py --force-habitat
 
 # Calculate the per species Delta P values
-littlejohn -j 200 -c ${DATADIR}/persistencebatch.csv ${PWD}/venv/bin/python3 --  ./deltap/global_code_residents_pixel_AE_128.py
-
-# Per scenario per taxa sum the delta Ps
-python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/arable/0.25/REPTILIA/ --output ${DATADIR}/deltap_sum/arable/0.25/REPTILIA.tif
-python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/arable/0.25/AVES/ --output ${DATADIR}/deltap_sum/arable/0.25/AVES.tif
-python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/arable/0.25/MAMMALIA/ --output ${DATADIR}/deltap_sum/arable/0.25/MAMMALIA.tif
-python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/arable/0.25/AMPHIBIA/ --output ${DATADIR}/deltap_sum/arable/0.25/AMPHIBIA.tif
-
-python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/restore/0.25/MAMMALIA/ --output ${DATADIR}/deltap_sum/restore/0.25/MAMMALIA.tif
-python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/restore/0.25/AMPHIBIA/ --output ${DATADIR}/deltap_sum/restore/0.25/AMPHIBIA.tif
-python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/restore/0.25/REPTILIA/ --output ${DATADIR}/deltap_sum/restore/0.25/REPTILIA.tif
-python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/restore/0.25/AVES/ --output ${DATADIR}/deltap_sum/restore/0.25/AVES.tif
-
-# Generate final map
-python3 ./deltap/delta_p_scaled_area.py --input ${DATADIR}/deltap_sum/restore/0.25/ \
-                                    --diffmap ${DATADIR}/habitat/restore_diff_area.tif \
-                                    --output ${DATADIR}/deltap_final/scaled_restore_0.25.tif
-
-python3 ./deltap/delta_p_scaled_area.py --input ${DATADIR}/deltap_sum/arable/0.25/ \
-                                    --diffmap ${DATADIR}/habitat/arable_diff_area.tif \
-                                    --output ${DATADIR}/deltap_final/scaled_arable_0.25.tif
+littlejohn -j 200 -o ${DATADIR}/persistencebatch.log -c ${DATADIR}/persistencebatch.csv ${VIRTUAL_ENV}/bin/python3 --  ./deltap/global_code_residents_pixel_AE_128.py
+
+for CURVE in "${CURVES[@]}"
+do
+    # Per scenario per taxa sum the delta Ps
+    python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/arable/${CURVE}/REPTILIA/ --output ${DATADIR}/deltap_sum/arable/${CURVE}/REPTILIA.tif
+    python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/arable/${CURVE}/AVES/ --output ${DATADIR}/deltap_sum/arable/${CURVE}/AVES.tif
+    python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/arable/${CURVE}/MAMMALIA/ --output ${DATADIR}/deltap_sum/arable/${CURVE}/MAMMALIA.tif
+    python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/arable/${CURVE}/AMPHIBIA/ --output ${DATADIR}/deltap_sum/arable/${CURVE}/AMPHIBIA.tif
+
+    python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/restore/${CURVE}/MAMMALIA/ --output ${DATADIR}/deltap_sum/restore/${CURVE}/MAMMALIA.tif
+    python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/restore/${CURVE}/AMPHIBIA/ --output ${DATADIR}/deltap_sum/restore/${CURVE}/AMPHIBIA.tif
+    python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/restore/${CURVE}/REPTILIA/ --output ${DATADIR}/deltap_sum/restore/${CURVE}/REPTILIA.tif
+    python3 ./utils/raster_sum.py --rasters_directory ${DATADIR}/deltap/restore/${CURVE}/AVES/ --output ${DATADIR}/deltap_sum/restore/${CURVE}/AVES.tif
+
+    # Generate final map
+    python3 ./deltap/delta_p_scaled_area.py --input ${DATADIR}/deltap_sum/restore/${CURVE}/ \
+                                        --diffmap ${DATADIR}/habitat/restore_diff_area.tif \
+                                        --output ${DATADIR}/deltap_final/scaled_restore_${CURVE}.tif
+
+    python3 ./deltap/delta_p_scaled_area.py --input ${DATADIR}/deltap_sum/arable/${CURVE}/ \
+                                        --diffmap ${DATADIR}/habitat/arable_diff_area.tif \
+                                        --output ${DATADIR}/deltap_final/scaled_arable_${CURVE}.tif
+done
\ No newline at end of file
diff --git a/utils/persistencegenerator.py b/utils/persistencegenerator.py
index 7a6cf34..49cd467 100644
--- a/utils/persistencegenerator.py
+++ b/utils/persistencegenerator.py
@@ -19,14 +19,15 @@ def species_generator(
         speciess = os.listdir(taxa_path)
         for scenario in ['arable', 'restore']:
             for species in speciess:
-                res.append([
-                    os.path.join(data_dir, 'species-info', taxa, 'current', species),
-                    os.path.join(data_dir, 'aohs', 'current', taxa),
-                    os.path.join(data_dir, 'aohs', scenario, taxa),
-                    os.path.join(data_dir, 'aohs', 'pnv', taxa),
-                    '0.25',
-                    os.path.join(data_dir, 'deltap', scenario, '0.25', taxa),
-                ])
+                for curve in ["0.1", "0.25", "0.5", "1.0", "gompertz"]:
+                    res.append([
+                        os.path.join(data_dir, 'species-info', taxa, 'current', species),
+                        os.path.join(data_dir, 'aohs', 'current', taxa),
+                        os.path.join(data_dir, 'aohs', scenario, taxa),
+                        os.path.join(data_dir, 'aohs', 'pnv', taxa),
+                        curve,
+                        os.path.join(data_dir, 'deltap', scenario, curve, taxa),
+                    ])
 
 
     df = pd.DataFrame(res, columns=[
diff --git a/utils/raster_sum.py b/utils/raster_sum.py
index 9a85f3d..0165b97 100644
--- a/utils/raster_sum.py
+++ b/utils/raster_sum.py
@@ -55,8 +55,7 @@ def build_k(
 
     files = [os.path.join(images_dir, x) for x in glob.glob("*.tif", root_dir=images_dir)]
     if not files:
-        print(f"No files in {images_dir}, aborting", file=sys.stderr)
-        sys.exit(-1)
+        sys.exit(f"No files in {images_dir}, aborting")
 
     with tempfile.TemporaryDirectory() as tempdir:
         with Manager() as manager:

From 0155662e60de0fbf48f3594eefaeaf15797cd393 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Thu, 31 Oct 2024 10:06:40 +0000
Subject: [PATCH 24/36] Remove unused scripts

---
 utils/downsample.py         |  84 ---------------------
 utils/enumerate_habitats.py |  55 --------------
 utils/readmeta.py           |  43 -----------
 utils/unionsum.py           |  58 ---------------
 utils/vt316generator.py     | 141 ------------------------------------
 5 files changed, 381 deletions(-)
 delete mode 100644 utils/downsample.py
 delete mode 100644 utils/enumerate_habitats.py
 delete mode 100644 utils/readmeta.py
 delete mode 100644 utils/unionsum.py
 delete mode 100644 utils/vt316generator.py

diff --git a/utils/downsample.py b/utils/downsample.py
deleted file mode 100644
index 2376a72..0000000
--- a/utils/downsample.py
+++ /dev/null
@@ -1,84 +0,0 @@
-from math import ceil, floor
-import sys
-
-import numpy as np
-from yirgacheffe.layers import RasterLayer, PixelScale
-
-from osgeo import gdal
-
-gdal.SetCacheMax(1024 * 1024 * 16)
-
-target_scale = PixelScale(0.083333333333333, -0.083333333333333)
-
-try:
-    source = RasterLayer.layer_from_file(sys.argv[1])
-    target_name = sys.argv[2]  # pylint: disable=C0103
-except IndexError:
-    print(f"Usage: {sys.argv[0]} [SRC] [DEST]", file=sys.stderr)
-    sys.exit(1)
-
-target = RasterLayer.empty_raster_layer(
-    area=source.area,
-    scale=target_scale,
-    datatype=source.datatype,
-    filename=target_name,
-    projection=source.projection
-)
-
-pixels_per_x = source.window.xsize / target.window.xsize
-pixels_per_y = source.window.ysize / target.window.ysize
-
-for y in range(target.window.ysize):
-    # read all the pixels that will overlap with this row from source
-    low_y = floor(y * pixels_per_y)
-    high_y = ceil((y+1) * pixels_per_y)
-
-    band_height = high_y - low_y
-    band = source.read_array(0, low_y, source.window.xsize, high_y - low_y)
-
-    dest = np.zeros((1, target.window.xsize))
-    for x in range(target.window.xsize):
-
-        low_x = floor(x * pixels_per_x)
-        high_x = ceil((x+1) * pixels_per_x)
-
-        total = np.sum(band[1:band_height - 1, low_x+1:high_x - 1])
-
-
-        # Work out the scaling factors for the sides
-
-        first_y = float(low_y + 1) - (y * pixels_per_y)
-        assert 0.0 <= first_y <= 1.0
-        last_y = ((y + 1) * pixels_per_y) - float(high_y - 1)
-        assert 0.0 <= last_y <= 1.0
-
-        first_x = float(low_x + 1) - (x * pixels_per_x)
-        assert 0.0 <= first_x <= 1.0
-        last_x = ((x + 1) * pixels_per_x) - float(high_x - 1)
-        assert 0.0 <= last_x <= 1.0
-
-        # major sides
-        total += np.sum(band[1:band_height - 1, low_x:low_x+1]) * first_y
-        total += np.sum(band[1:band_height - 1, high_x - 2:high_x - 1]) * last_y
-
-        total += np.sum(band[0][low_x+1:high_x - 1]) * first_x
-        total += np.sum(band[band_height - 1][low_x + 1:high_x - 1]) * last_x
-
-        # corners
-        total += band[0][low_x] * first_x * first_y
-        total += band[band_height - 1][low_x] * first_x * last_y
-        total += band[0][high_x - 1] * last_x * first_y
-        total += band[band_height - 1][high_x - 1] * last_x * last_y
-
-        dest[0][x] = total
-
-
-    target._dataset.GetRasterBand(1).WriteArray(dest, 0, y) # pylint: disable=W0212
-
-
-before = source.sum()
-after = target.sum()
-
-print(f"before: {before}")
-print(f"after:  {after}")
-print(f"diff:  {((after - before)/before) * 100.0}")
diff --git a/utils/enumerate_habitats.py b/utils/enumerate_habitats.py
deleted file mode 100644
index 599aaf5..0000000
--- a/utils/enumerate_habitats.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import argparse
-import logging
-from functools import partial
-from multiprocessing import Pool, cpu_count
-from typing import Set
-
-from yirgacheffe.layers import RasterLayer  # type: ignore
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)-8s %(message)s')
-
-BLOCKSIZE = 512
-
-def enumerate_subset(
-    habitat_path: str,
-    offset: int,
-) -> Set[int]:
-    with RasterLayer.layer_from_file(habitat_path) as habitat_map:
-        blocksize = min(BLOCKSIZE, habitat_map.window.ysize - offset)
-        data = habitat_map.read_array(0, offset, habitat_map.window.xsize, blocksize)
-        values = data.flatten()
-        res = set(values)
-    return res
-
-def enumerate_terrain_types(
-    habitat_path: str
-) -> Set[int]:
-    with RasterLayer.layer_from_file(habitat_path) as habitat_map:
-        ysize = habitat_map.window.ysize
-    blocks = range(0, ysize, BLOCKSIZE)
-    logger.info("Enumerating habitat classes in raster...")
-    with Pool(processes=int(cpu_count() / 2)) as pool:
-        sets = pool.map(partial(enumerate_subset, habitat_path), blocks)
-    superset = set()
-    for s in sets:
-        superset.update(s)
-    logger.info(superset)
-
-def main() -> None:
-    parser = argparse.ArgumentParser(description="Downsample habitat map to raster per terrain type.")
-    parser.add_argument(
-        '--map',
-        type=str,
-        help="Initial habitat.",
-        required=True,
-        dest="habitat_path"
-    )
-    args = parser.parse_args()
-
-    enumerate_terrain_types(
-        args.habitat_path,
-    )
-
-if __name__ == "__main__":
-    main()
diff --git a/utils/readmeta.py b/utils/readmeta.py
deleted file mode 100644
index 6fb2c10..0000000
--- a/utils/readmeta.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import json
-import sys
-import time
-
-import pyarrow.parquet as pq
-
-def main() -> None:
-    if len(sys.argv) != 2:
-        print(f"Usage: {sys.argv[0]}", file=sys.stderr)
-        sys.exit(1)
-
-    file = pq.read_table(sys.argv[1])
-    metadata = file.schema.metadata
-
-    try:
-        arkmetadata = metadata[b"experiment"]
-    except KeyError:
-        print("No ARK metadata on this file", file=sys.stderr)
-        sys.exit(1)
-
-    try:
-        info = json.loads(arkmetadata)
-    except ValueError as exc:
-        print("Unable to decode ARK metadata: %e", exc, file=sys.stderr)
-        sys.exit(1)
-
-    keys = list(info.keys())
-    keys.sort()
-    maxlen = 0
-    for k in keys:
-        if len(k) > maxlen:
-            maxlen = len(k)
-
-    for k in keys:
-        if k == 'timestamp':
-            val = time.ctime(info[k])
-        else:
-            val = info[k]
-
-        print(f'{k}{" " * (maxlen - len(k))}\t{val}')
-
-if __name__ == "__main__":
-    main()
diff --git a/utils/unionsum.py b/utils/unionsum.py
deleted file mode 100644
index 49166ed..0000000
--- a/utils/unionsum.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from math import ceil
-import os
-import shutil
-import sys
-import tempfile
-
-from osgeo import gdal
-from yirgacheffe.layers import Layer
-
-def main():
-    layers = [Layer.layer_from_file(x) for x in sys.argv[1:]]
-    area = Layer.find_union(layers)
-
-    for layer in layers:
-        layer.set_window_for_union(area)
-    pixel_pitch = layers[0].pixel_scale
-
-    driver = gdal.GetDriverByName('GTiff')
-    with tempfile.TemporaryDirectory() as tempdir:
-        tmp_filename = os.path.join(tempdir, "results.tif")
-
-        dataset = driver.Create(
-            tmp_filename,
-            ceil((area.right - area.left) / pixel_pitch[0]),
-            ceil((area.top - area.bottom) / (pixel_pitch[1] * -1)),
-            1,
-            gdal.GDT_Float32,
-            []
-        )
-        if dataset is None:
-            print(f"Failed to create {tmp_filename}")
-            sys.exit(-1)
-
-        dataset.SetGeoTransform([
-            area.left, pixel_pitch[0], 0.0, area.top, 0.0, pixel_pitch[1]
-        ])
-        dataset.SetProjection(layers[0].projection)
-
-        output_band = dataset.GetRasterBand(1)
-        pixel_width = layers[0].window.xsize
-        pixel_height = layers[0].window.ysize
-
-        for yoffset in range(pixel_height):
-            first = layers[0].read_array(0, yoffset, pixel_width, 1)
-            for other_layer in layers[1:]:
-                other = other_layer.read_array(0, yoffset, pixel_width, 1)
-                first = first + other
-            # Uncomment the below line to help see everything in QGIS
-            # first = numpy.logical_and(first > 0.0, True)
-            output_band.WriteArray(first, 0, yoffset)
-
-        # Force a close on the dataset and move it to the final location
-        del dataset
-        shutil.move(tmp_filename, "result.tif")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/utils/vt316generator.py b/utils/vt316generator.py
deleted file mode 100644
index 8948bb1..0000000
--- a/utils/vt316generator.py
+++ /dev/null
@@ -1,141 +0,0 @@
-import argparse
-import contextlib
-import json
-import sys
-from typing import Set
-import seasonality
-#from aoh.lib import seasonality
-from iucn_modlib.classes.Taxon import Taxon
-from iucn_modlib.factories import TaxonFactories
-
-@contextlib.contextmanager
-def file_writer(file_name = None):
-    writer = open(file_name, "w", encoding="utf-8") if file_name is not None else sys.stdout
-    yield writer
-    if file_name:
-        writer.close()
-
-def seasonality_for_species(species: Taxon, range_file: str) -> Set[str]:
-    og_seasons = set(
-        seasonality.habitatSeasonality(species) +
-        seasonality.rangeSeasonality(range_file, species.taxonid)
-    )
-    if len(og_seasons) == 0:
-        return {}
-    seasons = {'resident'}
-    if len(og_seasons.difference({'resident'})) > 0:
-        seasons = {'breeding', 'nonbreeding'}
-    return seasons
-
-def main() -> None:
-    parser = argparse.ArgumentParser(description="Species and seasonality generator.")
-    parser.add_argument(
-        '--experiment',
-        type=str,
-        help="name of experiment group from configuration json",
-        required=True,
-        dest="experiment"
-    )
-    parser.add_argument(
-        '--config',
-        type=str,
-        help="path of configuration json",
-        required=False,
-        dest="config_path",
-        default="config.json"
-    )
-    parser.add_argument(
-        '--list',
-        type=str,
-        help="list of all species",
-        required=True,
-        dest="list"
-    )
-    parser.add_argument(
-        '--output',
-        type=str,
-        help="name of output file for csv",
-        required=False,
-        dest="output"
-    )
-    parser.add_argument(
-        '--epochs',
-        type=str,
-        help="comma seperated (but no spaces!) list of experiments to run for",
-        required=True,
-        dest="epochs"
-    )
-    parser.add_argument(
-        '--class',
-        type=str,
-        help="Options are 'MAMMALIA', 'AVES', 'AMPHIBIA' and 'REPTILIA'",
-        required=True,
-        dest="class"
-    )
-
-    args = vars(parser.parse_args())
-
-    try:
-        with open(args['config_path'], 'r', encoding='utf-8') as config_file:
-            config = json.load(config_file)
-    except FileNotFoundError:
-        print(f'Failed to find configuration json file {args["config_path"]}')
-        sys.exit(-1)
-    except json.decoder.JSONDecodeError as exc:
-        print(f'Failed to parse {args["config_path"]} at line {exc.lineno}, column {exc.colno}: {exc.msg}')
-        sys.exit(-1)
-
-    try:
-        experiment = config['experiments'][args['experiment']]
-    except KeyError:
-        if not 'experiments' in config:
-            print("No experiments section founnd in configuration json")
-        else:
-            print(f'Failed to find experiment with name {args["experiment"]}. Options found:')
-            for experiment in config['experiments']:
-                print(f'\t{experiment}')
-        sys.exit(-1)
-
-    epoch_list = args['epochs'].split(',')
-
-    try:
-        range_path = experiment['range']
-    except KeyError:
-        print(f'Experiment "{args["experiment"]}" was missing range key.')
-
-    batch = None
-    if 'iucn_batch' in experiment:
-        batch = TaxonFactories.loadBatchSource(experiment['iucn_batch'])
-
-    # Work part 1: get the species list
-    with open(args["list"], "r", encoding="utf-8") as listfile:
-        all_species = listfile.readlines()
-
-    species_class = args['class']
-
-    species_list = [int(x.split(',')[1]) for x in all_species if species_class in x]
-
-    with file_writer(args["output"]) as output:
-        output.write('--taxid,--seasonality,--experiment\n')
-        for species_id in species_list:
-            if batch:
-                try:
-                    species = TaxonFactories.TaxonFactoryRedListBatch(species_id, batch)
-                except IndexError:
-                    # Some of the data in the batch needs tidy...
-                    print(f'{species_id} not in batch')
-                    continue
-            else:
-                try:
-                    species = TaxonFactories.TaxonFactoryRedListAPI(species_id, config['iucn']['api_key'])
-                except KeyError:
-                    print("Failed to find IUCN API key in config file or batch path in experiment.")
-                    sys.exit(-1)
-
-            seasonality_list = seasonality_for_species(species, range_path)
-            for season in seasonality_list:
-                for epoch in epoch_list:
-                    output.write(f'{species_id},{season},{epoch}\n')
-
-if __name__ == "__main__":
-    main()

From 95b590cc7547ed39d6db55602f3999b2f301a3d6 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Thu, 31 Oct 2024 10:08:06 +0000
Subject: [PATCH 25/36] More remove old scripts

---
 deltap/delta_P_hectare.r                    | 111 ----------
 deltap/gcrgen.py                            |  93 ---------
 deltap/global_code_residents_pixel_AE_64.py | 219 --------------------
 3 files changed, 423 deletions(-)
 delete mode 100644 deltap/delta_P_hectare.r
 delete mode 100644 deltap/gcrgen.py
 delete mode 100644 deltap/global_code_residents_pixel_AE_64.py

diff --git a/deltap/delta_P_hectare.r b/deltap/delta_P_hectare.r
deleted file mode 100644
index 8ed009e..0000000
--- a/deltap/delta_P_hectare.r
+++ /dev/null
@@ -1,111 +0,0 @@
-# Converts the cumulative deltaP rasters to values / hectare land use change
-# NOT VERY EFFICIENT- THERE ARE PROBALY MUCH FASTER WAYS
-rm(list = ls())
-# Dependancies
-library(raster)
-library(terra)
-library(gdata)
-
-# Function to clean up the delta_P rasters and save them:
-# 
-
-# INPUTS:  FOLDER TO RASTERS
-# LUC RASTER 
-# OUTPUT: RASTER STACK
-process_rasters <- function(data_dir, area_restore_path) {
-  # List raster files in the directory
-  rastlist <- list.files(path = data_dir, pattern = '.tif$', all.files = TRUE, full.names = TRUE)
-  # Read the area_restore raster
-  area_restore <- rast(area_restore_path)
-  # Filter area_restore
-  area_restore_filter <- ifel(area_restore < 1e4, 0, area_restore)
-  area_restore_filter <- area_restore_filter / 1e4
-
-  # Create a blank raster
-  blank_raster <- rast(area_restore_filter)
-  values(blank_raster) <- 0
-  # Initialize an empty stack
-  stack <- rast()
-  for (i in 1:length(rastlist)) {
-    rast_i <- rast(rastlist[i])
-    # Mosaic blank_raster and rast_i, and set names
-    rast_i_extent <- mosaic(blank_raster, rast_i, fun = "sum")
-    # Only keep values where LUC > hectare
-    rast_i_extent <- ifel(area_restore_filter != 0, rast_i_extent, 0)
-    rast_i_extent_ha <- rast_i_extent / area_restore_filter
-    # Set names
-    names(rast_i_extent_ha) <- names(rast_i)
-    # Append to the stack
-    stack <- append(stack, rast_i_extent_ha)
-  }
-  
-  return(stack)
-}
-
-# WRITE THE 0.25 restore and arable rasters
-results_stack<-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/restore/0.25","/maps/results/global_analysis/rasters/area_1_arc/area/diff_restore_area.tif")
-writeRaster(results_stack, filename = "/maps/results/global_analysis/outputs_mwd_processed/restore/restore_0.25.tif")
-
-min(results_stack$amphibians)
-min(results_stack$birds)
-
-keep(process_rasters,sure = TRUE)
-results_stack <-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/arable/0.25","/maps/results/global_analysis/rasters/area_1_arc/area/arable_diff_area.tif")
-plot(-1*(log(results_stack$birds))
-# MIGHT BE A PROBLEM WITH THIS?>
-writeRaster(results_stack, filename = "/maps/results/global_analysis/outputs_mwd_processed/arable/arable_0.25.tif")
-min(results_stack)
-plot(results_stack$amphibians)
-
-## WRITE OTHERS IF I WANT
-
-# RESTORE
-# 0.1
-restore_0.1<-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/restore/0.1","/maps/results/global_analysis/rasters/area_1_arc/area/diff_restore_area.tif")
-writeRaster(restore_0.1, filename = "/maps/results/global_analysis/outputs_mwd_processed/restore/restore_0.1.tif")
-min(results_stack$amphibians)
-min(results_stack$birds)
-
-keep(process_rasters,sure = TRUE)
-# 0.5 
-restore_0.5<-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/restore/0.5","/maps/results/global_analysis/rasters/area_1_arc/area/diff_restore_area.tif")
-writeRaster(restore_0.5, filename = "/maps/results/global_analysis/outputs_mwd_processed/restore/restore_0.5.tif")
-
-
-keep(process_rasters,sure = TRUE)
-#1.0
-restore_1<-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/restore/1.0","/maps/results/global_analysis/rasters/area_1_arc/area/diff_restore_area.tif")
-writeRaster(restore_1, filename = "/maps/results/global_analysis/outputs_mwd_processed/restore/restore_1.0.tif")
-keep(process_rasters,sure = TRUE)
-#gompertz 
-restore_gompertz<-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/restore/gompertz","/maps/results/global_analysis/rasters/area_1_arc/area/diff_restore_area.tif")
-writeRaster(restore_gompertz, filename = "/maps/results/global_analysis/outputs_mwd_processed/restore/restore_gompertz.tif")
-
-
-# Conserve 
-# 0.1
-keep(process_rasters,sure = TRUE)
-arable_0.1 <-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/arable/0.1","/maps/results/global_analysis/rasters/area_1_arc/area/arable_diff_area.tif")
-hist(log(arable_0.1$birds))
-writeRaster(arable_0.1, filename = "/maps/results/global_analysis/outputs_mwd_processed/arable/arable_0.1.tif")
-
-
-# 0.5
-keep(process_rasters,sure = TRUE)
-arable_0.5 <-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/arable/0.5","/maps/results/global_analysis/rasters/area_1_arc/area/arable_diff_area.tif")
-writeRaster(arable_0.5, filename = "/maps/results/global_analysis/outputs_mwd_processed/arable/arable_0.5.tif")
-
-
-
-
-# 1,0
-keep(process_rasters,sure = TRUE)
-arable_1 <-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/arable/1.0","/maps/results/global_analysis/rasters/area_1_arc/area/arable_diff_area.tif")
-writeRaster(arable_1, filename = "/maps/results/global_analysis/outputs_mwd_processed/arable/arable_1.0.tif")
-
-
-
-# Gompertz
-keep(process_rasters,sure = TRUE)
-arable_gompertz <-process_rasters("/maps/results/global_analysis/outputs_mwd_summarised/arable/gompertz","/maps/results/global_analysis/rasters/area_1_arc/area/arable_diff_area.tif")
-writeRaster(arable_gompertz, filename = "/maps/results/global_analysis/outputs_mwd_processed/arable/arable_gompertz.tif")
diff --git a/deltap/gcrgen.py b/deltap/gcrgen.py
deleted file mode 100644
index e835c40..0000000
--- a/deltap/gcrgen.py
+++ /dev/null
@@ -1,93 +0,0 @@
-"""
-Generate list of args for use with littlejohn and global_code_residents_pixel.py
-
-Note this works for my file structure specifically to avoid a crazy amount of 
-messy kwargs, but can be adapted fairly easily.
-
-"""
-
-import argparse
-import os
-
-import pandas as pd
-
-parser = argparse.ArgumentParser(description="")
-parser.add_argument('--target_dir',
-        type=str,help="Look for folders called 'search' in this directory",
-        required=True,dest="target_dir")
-parser.add_argument('--scenario', required=True, dest="scenario")
-parser.add_argument('--output_dir',
-                    type = str, help = "where to save the csv",
-                    required = True, dest = "output_dir")
-args = vars(parser.parse_args())
-
-
-classes = ["birds", "mammals", "amphibians", "reptiles"]
-z_values = ["gompertz"]
-season = "RESIDENT"
-habmaps = {"historic"   : "pnv",
-           "scenario"   : args["scenario"],
-           "current"    : "current_L1L2"
-           }
-habmaps_r = {v: k for k, v in habmaps.items()}
-
-
-target_dir = args["target_dir"]
-
-# for z in z_values:
-#         for taxa in classes:
-#             os.makedirs(os.path.join(args["output_dir"], args["scenario"], str(z), taxa), exist_ok=True)
-os.makedirs(args["output_dir"], exist_ok=True)
-
-tif_file_paths = []
-for path, subdirs, files in os.walk(args["target_dir"]):
-    for name in files:
-        _, ext = os.path.splitext(name)
-        if ext == '.tif':
-            tif_file_paths.append(os.path.join(path, name))
-
-df = pd.DataFrame()
-index_levels = ["taxid", "season", "taxclass"]
-df.index = pd.MultiIndex(levels=[[]] * len(index_levels), codes=[[]] * len(index_levels), names=index_levels)
-
-for i, file in enumerate(tif_file_paths):
-    # print("Reading in files: ", round(i/len(tif_file_paths), 4), end = "\r" )
-
-    path, fname = os.path.split(file)
-    taxid = fname.split("-")[-1].split(".")[0]
-    season = fname.split("-")[0].split(".")[-1]
-    c1 = 0
-    for tc in classes:
-        if tc in path:
-            taxclass = tc
-            c1 += 1
-    c2 = 0
-    for hmap in habmaps.values():
-        if hmap in path:
-            habmap = hmap
-            c2 += 1
-    if c1 == 1 and c2 == 1:
-        df.loc[(taxid, season, taxclass), habmaps_r[habmap]] = file
-df = df.reset_index()
-if "historic" not in df.columns:
-    df['historic'] = "nan"
-
-filename = os.path.join(args["output_dir"], f"g_file_index_{args['scenario']}_lj.csv")
-with open(filename, "w+") as out_file:
-    out_file.write("--current_path,--scenario_path,--historic_path,--output_path,--z")
-    out_file.write("\n")
-
-    for i, (idx, row) in enumerate(df.iterrows()):
-        for z in z_values:
-            print(f"Writing littlejohn arguments to {filename}: ", round(i/len(df), 4), end = "\r" )
-            curr = row.current
-            scen = row.scenario
-            hist = row.historic
-            ofname = f"Seasonality.{row.season}-{row.taxid}.tif"
-            of = os.path.join(args["output_dir"], args["scenario"], str(z), row.taxclass, ofname)
-
-            out_file.write(f"{curr},{scen},{hist},{of},{str(z)}")
-            out_file.write("\n")
-
-
-    
\ No newline at end of file
diff --git a/deltap/global_code_residents_pixel_AE_64.py b/deltap/global_code_residents_pixel_AE_64.py
deleted file mode 100644
index 0743885..0000000
--- a/deltap/global_code_residents_pixel_AE_64.py
+++ /dev/null
@@ -1,219 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed May 17 15:51:13 2023
-
-@authors: Thomas Ball, Ali Eyres
-
-This is a modified version of global_code_residents.py that calculates delta p for a set of
-Rasters. Any resolution should work since it just uses x/y as identifiers.
-
-# AE modified from TB's Code for CSVs
-# Not sure if it works...
-
-This isn't tidied or commented properly
-"""
-import argparse
-import os
-import math
-import re
-import sys
-import warnings
-
-# warnings.simplefilter("error")
-
-import pandas as pd
-import numpy as np
-from osgeo import gdal
-from yirgacheffe.layers import RasterLayer, ConstantLayer
-
-
-quiet = False
-overwrite = True
-
-gompertz_a = 2.5
-gompertz_b = -14.5
-gompertz_alpha = 1
-
-def gen_gompertz(x,):
-  return math.exp(-math.exp(gompertz_a + (gompertz_b * (x ** gompertz_alpha))))
-
-def numpy_gompertz(x):
-    return np.exp(-np.exp(gompertz_a + (gompertz_b * (x ** gompertz_alpha))))
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    '--current_path',
-    type=str,
-    required=True,
-    dest="current_path",
-    help="path to species current AOH hex"
-)
-parser.add_argument(
-    '--historic_path',
-    type=str,
-    required=False,
-    dest="historic_path",
-    help="path to species historic AOH hex"
-)
-parser.add_argument(
-    '--scenario_path',
-    type=str,
-    required=True,
-    dest="scenario_path",
-    help="path to species scenario AOH hex"
-)
-parser.add_argument('--output_path',
-    type=str,
-    required=True,
-    dest="output_path",
-    help="path to save output csv"
-)
-parser.add_argument('--z', dest='exponent', type=str, default='0.25')
-parser.add_argument('-ht', '--hist_table',
-                    dest = "hist_table",
-                    type = str)
-args = vars(parser.parse_args())
-
-try:
-    exp_val = float(args['exponent'])
-    z_exponent_func_float = lambda x: np.float_power(x, exp_val)
-    z_exponent_func_raster = lambda x: np.float_power(x, exp_val)
-except ValueError:
-    if args['exponent'] == "gompertz":
-        z_exponent_func_float = gen_gompertz
-        z_exponent_func_raster = numpy_gompertz
-    else:
-        quit(f"unrecognised exponent {args['exponent']}")
-
-if (not 'historic_path' in args.keys()) and (not 'hist_table' in args.keys()):
-    quit("Please provide either historic_path or hist_table arguments")
-
-if not overwrite and os.path.isfile(args['output_path']):
-    quit(f"{args['output_path']} exists, set overwrite to False to ignore this.")
-path, _ = os.path.split(args["output_path"])
-os.makedirs(path, exist_ok=True)
-
-FILERE = re.compile(r'.*Seasonality.(\w+)-(\d+).tif$')
-season, taxid = FILERE.match(args['current_path']).groups()
-season = season.lower()
-
-
-def open_layer_as_float64(filename: str) -> RasterLayer:
-    if filename == "nan":
-        return ConstantLayer(0.0)
-    layer = RasterLayer.layer_from_file(filename)
-    if layer.datatype == gdal.GDT_Float64:
-        return layer
-    layer64 = RasterLayer.empty_raster_layer_like(layer, datatype=gdal.GDT_Float64)
-    layer.save(layer64)
-    return layer64
-
-
-def calc_persistence_value(current_AOH: float, historic_AOH: float, exponent_func) -> float:
-    sp_P = exponent_func(current_AOH / historic_AOH)
-    sp_P_fix = np.where(sp_P > 1, 1, sp_P)
-    return sp_P_fix
-
-
-def process_delta_p(current: RasterLayer, scenario: RasterLayer, current_AOH: float, historic_AOH: float) -> RasterLayer:
-    # In theory we could recalc current_AOH, but given we already have it don't duplicate work
-    # New section added in: Calculating for rasters rather than csv's
-    const_layer = ConstantLayer(current_AOH) # MAKE A LAYER WITH THE SAME PROPERTIES AS CURRENT AOH RASTER BUT FILLED WITH THE CURRENT AOH
-    calc_1 = (const_layer - current) + scenario # FIRST CALCULATION : NEW AOH
-    new_AOH = RasterLayer.empty_raster_layer_like(current)
-    calc_1.save(new_AOH)
-
-    calc_2 = (new_AOH / historic_AOH).numpy_apply(z_exponent_func_raster)
-    calc_2 = calc_2.numpy_apply(lambda chunk: np.where(chunk > 1, 1, chunk))
-    new_p = RasterLayer.empty_raster_layer_like(new_AOH)
-    calc_2.save(new_p)
-
-    return new_p
-
-
-hdf = pd.read_csv(args['hist_table'])
-
-if season == 'resident':
-    try:
-        current = open_layer_as_float64(args['current_path'])
-        scenario = open_layer_as_float64(args['scenario_path'])
-    except FileNotFoundError as fnf:
-        quit(f"Failed to open {fnf.filename}")
-
-    layers = [current, scenario]
-    union = RasterLayer.find_union(layers)
-    for layer in layers:
-        try:
-            layer.set_window_for_union(union)
-        except ValueError:
-            pass
-
-    current_AOH = current.sum()
-    historic_AOH = hdf[(hdf.id_no == int(taxid))&(hdf.season == " " + season)].AOH.values[0]
-    if historic_AOH == 0.0:
-        quit(f"Historic AoH for {taxid} is zero, aborting")
-
-    new_p_layer = process_delta_p(current, scenario, current_AOH, historic_AOH)
-
-    old_persistence = calc_persistence_value(current_AOH, historic_AOH, z_exponent_func_float)
-    calc = new_p_layer - ConstantLayer(old_persistence)
-    delta_p = RasterLayer.empty_raster_layer_like(new_p_layer, filename=args['output_path'])
-    calc.save(delta_p)
-
-elif season == 'nonbreeding':
-    # We have the nonbreeding path, work out the breeding path, check that works, and then do the work.
-    non_breeding_current_path = args['current_path']
-    directory, _ = os.path.split(non_breeding_current_path)
-    breeding_current_path = os.path.join(directory, f'Seasonality.BREEDING-{taxid}.tif')
-
-    non_breeding_scenario_path = args['scenario_path']
-    if non_breeding_scenario_path != "nan":
-        assert 'NONBREEDING' in non_breeding_scenario_path
-        directory, _ = os.path.split(non_breeding_scenario_path)
-        breeding_scenario_path = os.path.join(directory, f'Seasonality.BREEDING-{taxid}.tif')
-    else:
-        breeding_scenario_path = non_breeding_scenario_path
-
-    try:
-        current_breeding = open_layer_as_float64(breeding_current_path)
-        current_non_breeding = open_layer_as_float64(non_breeding_current_path)
-        scenario_breeding = open_layer_as_float64(breeding_scenario_path)
-        scenario_non_breeding = open_layer_as_float64(non_breeding_scenario_path)
-    except FileNotFoundError as fnf:
-        quit(f"Failed to open {fnf.filename}")
-
-    layers = [current_breeding, current_non_breeding, scenario_breeding, scenario_non_breeding]
-    union = RasterLayer.find_union(layers)
-    for layer in layers:
-        try:
-            layer.set_window_for_union(union)
-        except ValueError:
-            pass
-
-
-    current_AOH_breeding = current_breeding.sum()
-    historic_AOH_breeding = hdf[(hdf.id_no == int(taxid))&(hdf.season == " " + 'breeding')].AOH.values[0]
-    if historic_AOH_breeding == 0.0:
-        quit(f"Historic AoH breeding for {taxid} is zero, aborting")
-    persistence_breeding = calc_persistence_value(current_AOH_breeding, historic_AOH_breeding, z_exponent_func_float)
-
-    current_AOH_non_breeding = current_non_breeding.sum()
-    historic_AOH_non_breeding = hdf[(hdf.id_no == int(taxid))&(hdf.season == " " + 'nonbreeding')].AOH.values[0]
-    if historic_AOH_non_breeding == 0.0:
-        quit(f"Historic AoH for non breeding {taxid} is zero, aborting")
-    persistence_non_breeding = calc_persistence_value(current_AOH_non_breeding, historic_AOH_non_breeding, z_exponent_func_float)
-
-    old_persistence = (persistence_breeding ** 0.5) * (persistence_non_breeding ** 0.5)
-    print(old_persistence)
-
-    new_p_breeding = process_delta_p(current_breeding, scenario_breeding, current_AOH_breeding, historic_AOH_breeding)
-    new_p_non_breeding = process_delta_p(current_non_breeding, scenario_non_breeding, current_AOH_non_breeding, historic_AOH_non_breeding)
-
-    new_p_layer = (new_p_breeding ** 0.5) * (new_p_non_breeding ** 0.5)
-
-    delta_p_layer = new_p_layer - ConstantLayer(old_persistence)
-
-    output = RasterLayer.empty_raster_layer_like(new_p_breeding, filename=args['output_path'])
-    delta_p_layer.save(output)
-
-    print(delta_p_layer.sum())
\ No newline at end of file

From 1e607db83ba32996edc0af8f59e17768435eec79 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Thu, 31 Oct 2024 10:08:52 +0000
Subject: [PATCH 26/36] Remove old IUCN importer code

---
 IUCN-importer/README.md            |  27 -----
 IUCN-importer/iucn_gpkg_creator.py | 174 -----------------------------
 2 files changed, 201 deletions(-)
 delete mode 100644 IUCN-importer/README.md
 delete mode 100644 IUCN-importer/iucn_gpkg_creator.py

diff --git a/IUCN-importer/README.md b/IUCN-importer/README.md
deleted file mode 100644
index d257546..0000000
--- a/IUCN-importer/README.md
+++ /dev/null
@@ -1,27 +0,0 @@
-
-## Overview
-
-This program to collates IUCN Red List data to help researchers calculate the likelihood of whether a species will become extinct or remain extant. It processes the habitats, elevation limits, and range polygons for entire classes of species and aggregates them into a single geo-package. This geo-package can be used to calculate and map the AOH for different species individually. Mapping the AOH of species is the first step in determining its persistence metric. 
-
-### Key Instructions
-
-The data for the species you are interested in needs to be **downloaded, extracted and combined** - the IUCN only lets you download the csv files and the shape files separately, but they all need to be in the *same file*.
-
-That file name then becomes the argument that you pass in with the program when running in a cmd prompt. 
-
-### Things to know
-
-* The program can take a while to run - it has to read in a shapefile each time, which is usually over 1GB in size for a class. 
-
-* If there is no shapefile data for a species that is present in one of the csv files, that species won't appear in the final file. 
-
-* The shapefile's column for season is coded into integers from 1-5: 
-	1. Resident
-	2. Breeding
-	3. Non-Breeding
-	4. Passing
-	5. Unknown
-	4 and 5 are ignored and assumed to be 1 for the purposes of this program.
-
- 
-
diff --git a/IUCN-importer/iucn_gpkg_creator.py b/IUCN-importer/iucn_gpkg_creator.py
deleted file mode 100644
index 42524cd..0000000
--- a/IUCN-importer/iucn_gpkg_creator.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# The shapefile's column for season is coded into integers from 1-5:
-#     1 = Resident,
-#     2 = Breeding,
-#     3 = Non-Breeding,
-#     4 = Passing,
-#     5 = Unknown
-# We don't care about 4 and 5, so they just get turned into 1s.
-# BUT we need these to be in text format, so we can merge the csv file with the shape file on this column (as well as
-# internalTaxonId) HOWEVER in the case of Amphibians, although the csv file seems to indicate that some species have
-# breeding grounds as well as residency, it is actually only the HABITAT not the LOCATION that changes for breeding:
-# e.g. frogs will live in the forest, but breed in the ponds, but stay in the same geographical area. Therefore if
-# every species has a 1, then I will merge the tables on internaltaxonId only, and copy the shapefiles for both
-# breeding and non-breeding/resident.
-
-# Imports
-import argparse
-import glob
-import json
-import os.path
-from typing import Tuple
-
-import pandas as pd
-import geopandas as gpd
-from shapely.geometry import Polygon, MultiPolygon
-
-PYDEVD_WARN_SLOW_RESOLVE_TIMEOUT = 10
-
-# This function opens up and reads the files necessary to make the geopackage from the specified folder
-def reading_files(folder: str) -> Tuple[pd.DataFrame, pd.DataFrame, gpd.GeoDataFrame]:
-    print("Reading csv files...")
-    habitats = pd.read_csv(os.path.join(folder, "habitats.csv"))
-    allotherfields = pd.read_csv(os.path.join(folder, "all_other_fields.csv"))
-    print("Reading shape file...")
-    shapename = ''.join(glob.glob(os.path.join(folder, '*.shp')))
-    geo = gpd.read_file(shapename)
-    return habitats, allotherfields, geo
-
-# The shapefile's column for season is coded into integers from 1-5:
-# 1 = Resident, 2 = Breeding, 3 = Non-Breeding, 4 = Passing, 5 = Unknown
-# The habitats.csv column for season is the string version of the above coding.
-# Any season that is passing, unknown, or simply doesn't have a value, is marked as resident
-def changing_seasons(seasons: pd.Series) -> pd.Series:
-    if isinstance(seasons, str):
-        seasons.fillna("Resident", inplace = True)
-    season_array = []
-    for season in seasons:
-        if ("NON-BREEDING" in str(season).upper()) or ('3' in str(season)):
-            season_array.append(3)
-        elif ("BREEDING" in str(season).upper()) or ('2' in str(season)):
-            season_array.append(2)
-        else:
-            season_array.append(1)
-    return pd.Series(season_array)
-
-#This function extracts, fills and replaces key pandas Series
-#This function also creates temporary pandas DataFrames for manipulation
-def extracting_series(
-    habitats: pd.DataFrame,
-    allotherfields: pd.DataFrame,
-    geo: gpd.GeoDataFrame
-) -> Tuple[pd.DataFrame, pd.DataFrame, gpd.GeoDataFrame]:
-    print("Extracting key data...")
-    habitats['majorImportance'].fillna("Yes", inplace = True)
-
-    temp_season = changing_seasons(habitats['season'])
-    habitats = habitats.drop(['season'], axis = 1)
-    habitats = habitats.assign(season = temp_season)
-
-    temp = pd.DataFrame(data = pd.Series(allotherfields['internalTaxonId']))
-    temp = temp.assign(ElevationLower = pd.Series(allotherfields['ElevationLower.limit']).fillna(-500.0),
-                       ElevationUpper = pd.Series(allotherfields['ElevationUpper.limit']).fillna(9000.0))
-
-    notfinal = gpd.GeoDataFrame(data = geo['geometry'])
-    # We've seen both CAPS and noncaps column names, so here we just force everything to CAPS for consistency
-    geo = geo.rename(columns={
-        'id_no': 'ID_NO',
-        'presence': 'PRESENCE',
-        'origin': 'ORIGIN',
-        'seasonal': 'SEASONAL',
-    })
-    notfinal = notfinal.assign(internalTaxonId = pd.Series(geo['ID_NO']), Presence = pd.Series(geo['PRESENCE']),
-                               Origin = pd.Series(geo['ORIGIN']), season = changing_seasons(pd.Series(geo['SEASONAL'])))
-
-    return habitats, temp, notfinal
-
-# This function aggregates the data from the two csv files together
-def habitats_sort(habitats: pd.DataFrame, temp: pd.DataFrame) -> pd.DataFrame:
-    print("Grouping files...")
-    habitats = habitats.groupby(['internalTaxonId', 'season']).agg({
-        'code': lambda x: json.dumps(x.tolist()),
-        'majorImportance': lambda x: json.dumps(x.tolist()),
-        'suitability': lambda x: json.dumps(x.tolist())
-    }).reset_index()
-    habitats = habitats.merge(temp, how='left', on='internalTaxonId')
-    return habitats
-
-# This function takes the 'geometry' row and if the row has more than one polygon or
-# multipolygon in it, combines them together to make a new multipolygon
-def to_polygons(geometries):
-    for geometry in geometries:
-        if isinstance(geometry, Polygon):
-            yield geometry
-        elif isinstance(geometry, MultiPolygon):
-            yield from geometry.geoms
-        else:
-            raise TypeError(f"Unexpected type: {type(geometry)}")
-
-# This function aggregates the GeoDataFrame, and then merges it with the data from the csv files
-def shape_sort(notfinal: gpd.GeoDataFrame, habitats: pd.DataFrame) -> gpd.GeoDataFrame:
-    #print(notfinal) #Debugging
-    print("Combining files...")
-    notfinal = (notfinal.groupby(['internalTaxonId', 'season']).agg({
-        'Presence': lambda x: json.dumps(x.tolist()),
-        'Origin': lambda x: json.dumps(x.tolist()),
-        'geometry': lambda x: MultiPolygon(to_polygons(x))
-    })).reset_index()
-    notfinal = notfinal.merge(habitats, how='left', on=['internalTaxonId', 'season'])
-    return notfinal
-
-# This function converts the GeoDataFrame into a GeoPackage
-def to_file(notfinal: gpd.GeoDataFrame, output_path: str) -> None:
-    print("Building file...")
-    # When the non-geometry related series are added, final becomes a DataFrame -
-    # so it has to be turned back into a GeoDataFrame
-    final = gpd.GeoDataFrame(notfinal, geometry = 'geometry')
-    final.to_file(output_path, driver = 'GPKG', index = None)
-    print(f"File {output_path} created")
-
-# This function records the IUCN Red List Taxonomy ID for the species that were
-# not in the final geopackage
-def unrecorded_data(habitats: pd.DataFrame, notfinal: gpd.GeoDataFrame):
-    taxid = pd.DataFrame(habitats['internalTaxonId'])
-    csv_set = set()
-    for _, row1 in taxid.iterrows():
-        csv_set.add(row1['internalTaxonId'])
-    nopair = set()
-    inopair = 0
-    for _, row2 in notfinal.iterrows():
-        if row2['internalTaxonId'] in csv_set:
-            continue
-        nopair.add(row2['internalTaxonId'])
-        inopair = inopair+1
-    if inopair > 0:
-        print("There was not enough data for: ")
-        for x in nopair:
-            print(x)
-
-def main():
-    parser = argparse.ArgumentParser(description="Process IUCN raw data to gpkg file.")
-    parser.add_argument(
-        '--raw',
-        type=str,
-        help='Directory containing raw CSV and shape files from IUCN',
-        required=True,
-        dest='input_directory_path',
-    )
-    parser.add_argument(
-        '--output',
-        type=str,
-        help='Name of result package file',
-        required=True,
-        dest='output_path',
-    )
-    args = parser.parse_args()
-
-    habitats, allotherfields, geo = reading_files(args.input_directory_path)
-    habitats, temp, notfinal = extracting_series(habitats, allotherfields, geo)
-    habitats = habitats_sort(habitats, temp)
-    notfinal = shape_sort(notfinal, habitats)
-    to_file(notfinal, args.output_path)
-    #unrecorded_data(habitats, notfinal)
-
-if __name__ == "__main__":
-    main()

From e332c0446906b9d2ade255d80251997b00899cb5 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Fri, 1 Nov 2024 13:49:01 +0000
Subject: [PATCH 27/36] Add script to let us generate analysis of maps

---
 utils/regression_plot.py | 80 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 utils/regression_plot.py

diff --git a/utils/regression_plot.py b/utils/regression_plot.py
new file mode 100644
index 0000000..9ddd8b8
--- /dev/null
+++ b/utils/regression_plot.py
@@ -0,0 +1,80 @@
+import argparse
+import functools
+import operator
+import os
+import random
+import sys
+from multiprocessing import Pool, cpu_count
+
+import matplotlib.pyplot as plt
+import numpy as np
+from yirgacheffe.layers import RasterLayer
+
+def filter(chunks):
+    a_chunk, b_chunk = chunks
+    res = []
+    for a, b in zip(a_chunk, b_chunk):
+        if np.isnan(a) or np.isnan(b):
+            continue
+        if a == 0.0 and b == 0.0:
+            continue
+        res.append((a, b))
+    return res
+
+def regression_plot(
+    a_path: str,
+    b_path: str,
+) -> None:
+    with RasterLayer.layer_from_file(a_path) as a_layer:
+        with RasterLayer.layer_from_file(b_path) as b_layer:
+            if a_layer.pixel_scale != b_layer.pixel_scale:
+                sys.exit("GeoTIFFs have different pixel scale")
+            if a_layer.area != b_layer.area:
+                sys.exit("GeoTIFFs have different spatial coordinates")
+            if a_layer.window != b_layer.window:
+                sys.exit("GeoTIFFs have different pixel dimensions")
+
+            a_pixels = a_layer.read_array(0, 0, a_layer.window.xsize, a_layer.window.ysize)
+            b_pixels = b_layer.read_array(0, 0, b_layer.window.xsize, b_layer.window.ysize)
+
+    with Pool(processes=cpu_count() // 2) as pool:
+        filtered_chunk_pairs = pool.map(filter, zip(a_pixels, b_pixels))
+        filtered_pairs = functools.reduce(operator.iconcat, filtered_chunk_pairs, [])
+        sampled_pairs = random.sample(filtered_pairs, len(filtered_pairs) // 10)
+        a_filtered, b_filtered = zip(*sampled_pairs)
+
+    # m, b = np.polyfit(a_filtered, b_filtered, 1)
+
+    fig, ax = plt.subplots()
+    ax.scatter(x=a_filtered, y=b_filtered, marker=",")
+    # ax.plot(a_data, m * a_data + b)
+    plt.xlabel(os.path.basename(a_path))
+    plt.ylabel(os.path.basename(b_path))
+    plt.savefig("test.png")
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Generates a scatter plot comparing two GeoTIFFs.")
+    parser.add_argument(
+        "--a",
+        type=str,
+        required=True,
+        dest="a",
+        help="First GeoTIFF"
+    )
+    parser.add_argument(
+        "--b",
+        type=str,
+        required=True,
+        dest="b",
+        help="Second GeoTIFF"
+    )
+    args = parser.parse_args()
+
+    regression_plot(
+        args.a,
+        args.b,
+    )
+
+if __name__ == "__main__":
+    main()
+

From 677ce81d98916d480d05afaf98adaf39832ecef2 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Wed, 6 Nov 2024 13:05:25 +0000
Subject: [PATCH 28/36] Add analysis scripts to master run script

---
 scripts/run.sh           | 19 ++++++++++++++++---
 utils/regression_plot.py | 15 +++++++++++++--
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/scripts/run.sh b/scripts/run.sh
index 60d8aa2..4c52c0d 100755
--- a/scripts/run.sh
+++ b/scripts/run.sh
@@ -20,8 +20,7 @@ if [ -z "${VIRTUAL_ENV}" ]; then
     exit 1
 fi
 
-# declare -a CURVES=("0.1" "0.25" "0.5" "1.0" "gompertz")
-declare -a CURVES=("1.0" "gompertz")
+declare -a CURVES=("0.1" "0.25" "0.5" "1.0" "gompertz")
 
 # Get habitat layer and prepare for use
 reclaimer zenodo --zenodo_id 4058819 \
@@ -118,4 +117,18 @@ do
     python3 ./deltap/delta_p_scaled_area.py --input ${DATADIR}/deltap_sum/arable/${CURVE}/ \
                                         --diffmap ${DATADIR}/habitat/arable_diff_area.tif \
                                         --output ${DATADIR}/deltap_final/scaled_arable_${CURVE}.tif
-done
\ No newline at end of file
+done
+
+for CURVE in "${CURVES[@]}"
+do
+    if [ "${CURVE}" == "0.25" ]; then
+        continue
+    fi
+    python3 ./utils/regression_plot.py --a ${DATADIR}/deltap_final/summed_scaled_arable_${CURVE}.tif \
+                                    --b ${DATADIR}/deltap_final/summed_scaled_arable_0.25.tif \
+                                    --output {$DATADIR}/analysis/arable_0.25_vs_${CURVE}.png
+
+    python3 ./utils/regression_plot.py --a ${DATADIR}/deltap_final/summed_scaled_restore_${CURVE}.tif \
+                                    --b ${DATADIR}/deltap_final/summed_scaled_restore_0.25.tif \
+                                    --output {$DATADIR}/analysis/restore_0.25_vs_${CURVE}.png
+done
diff --git a/utils/regression_plot.py b/utils/regression_plot.py
index 9ddd8b8..819654e 100644
--- a/utils/regression_plot.py
+++ b/utils/regression_plot.py
@@ -24,7 +24,11 @@ def filter(chunks):
 def regression_plot(
     a_path: str,
     b_path: str,
+    output_path: str,
 ) -> None:
+    output_dir, _ = os.path.split(output_path)
+    os.makedirs(output_dir, exist_ok=True)
+
     with RasterLayer.layer_from_file(a_path) as a_layer:
         with RasterLayer.layer_from_file(b_path) as b_layer:
             if a_layer.pixel_scale != b_layer.pixel_scale:
@@ -47,10 +51,9 @@ def regression_plot(
 
     fig, ax = plt.subplots()
     ax.scatter(x=a_filtered, y=b_filtered, marker=",")
-    # ax.plot(a_data, m * a_data + b)
     plt.xlabel(os.path.basename(a_path))
     plt.ylabel(os.path.basename(b_path))
-    plt.savefig("test.png")
+    plt.savefig(output_path)
 
 def main() -> None:
     parser = argparse.ArgumentParser(description="Generates a scatter plot comparing two GeoTIFFs.")
@@ -68,11 +71,19 @@ def main() -> None:
         dest="b",
         help="Second GeoTIFF"
     )
+    parser.add_argument(
+        "--output",
+        type=str,
+        required=True,
+        dest="output",
+        help="Destination png file for results."
+    )
     args = parser.parse_args()
 
     regression_plot(
         args.a,
         args.b,
+        args.output,
     )
 
 if __name__ == "__main__":

From 3fc914a24749011e0598fbaa1daf5eeb24aae8c9 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Thu, 7 Nov 2024 08:54:37 +0000
Subject: [PATCH 29/36] Add species richness predictor.

---
 predictors/species-richness.py | 211 +++++++++++++++++++++++++++++++++
 scripts/run.sh                 |   2 +
 2 files changed, 213 insertions(+)
 create mode 100644 predictors/species-richness.py

diff --git a/predictors/species-richness.py b/predictors/species-richness.py
new file mode 100644
index 0000000..0716362
--- /dev/null
+++ b/predictors/species-richness.py
@@ -0,0 +1,211 @@
+import argparse
+import os
+import sys
+import tempfile
+import time
+from glob import glob
+from multiprocessing import Manager, Process, Queue, cpu_count
+from typing import Set
+
+import numpy as np
+from osgeo import gdal
+from yirgacheffe.layers import RasterLayer
+
+def stage_1_worker(
+    filename: str,
+    result_dir: str,
+    input_queue: Queue,
+) -> None:
+    output_tif = os.path.join(result_dir, filename)
+
+    merged_result = None
+
+    while True:
+        raster_paths = input_queue.get()
+        if raster_paths is None:
+            break
+
+        rasters = [RasterLayer.layer_from_file(x) for x in raster_paths]
+
+        if len(rasters) > 1:
+            union = RasterLayer.find_union(rasters)
+            for r in rasters:
+                r.set_window_for_union(union)
+            calc = rasters[0].numpy_apply(lambda chunk: np.where(chunk == 0.0, 0, 1))
+            for r in rasters[:1]:
+                calc = calc | r.numpy_apply(lambda chunk: np.where(chunk == 0.0, 0, 1))
+
+            partial = RasterLayer.empty_raster_layer_like(rasters[0], datatype=gdal.GDT_Int16)
+            calc.save(partial)
+        else:
+            partial = rasters[0].numpy_apply(lambda chunk: np.where(chunk == 0.0, 0, 1))
+
+        if merged_result is None:
+            if len(rasters) > 1:
+                merged_result = partial
+            else:
+                merged_result = RasterLayer.empty_raster_layer_like(rasters[0], datatype=gdal.GDT_Int16)
+                partial.save(merged_result)
+        else:
+            merged_result.reset_window()
+            if len(rasters) > 1:
+                union = RasterLayer.find_union([merged_result, partial])
+                partial.set_window_for_union(union)
+            else:
+                union = RasterLayer.find_union([merged_result, rasters[0]])
+                rasters[0].set_window_for_union(union)
+                partial = rasters[0].numpy_apply(lambda chunk: np.where(chunk == 0.0, 0, 1))
+            merged_result.set_window_for_union(union)
+
+
+            merged = partial + merged_result
+            temp = RasterLayer.empty_raster_layer_like(merged_result)
+            merged.save(temp)
+            merged_result = temp
+
+    if merged_result is not None:
+        final = RasterLayer.empty_raster_layer_like(merged_result, filename=output_tif)
+        merged_result.save(final)
+
+def stage_2_worker(
+    filename: str,
+    result_dir: str,
+    input_queue: Queue,
+) -> None:
+    output_tif = os.path.join(result_dir, filename)
+
+    merged_result = None
+
+    while True:
+        path = input_queue.get()
+        if path is None:
+            break
+
+        with RasterLayer.layer_from_file(path) as partial_raster:
+            if merged_result is None:
+                merged_result = RasterLayer.empty_raster_layer_like(partial_raster)
+                cleaned_raster = partial_raster.numpy_apply(lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0))
+                cleaned_raster.save(merged_result)
+            else:
+                merged_result.reset_window()
+
+                union = RasterLayer.find_union([merged_result, partial_raster])
+                merged_result.set_window_for_union(union)
+                partial_raster.set_window_for_union(union)
+
+                calc = merged_result + (partial_raster.numpy_apply(lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0)))
+                temp = RasterLayer.empty_raster_layer_like(merged_result)
+                calc.save(temp)
+                merged_result = temp
+
+    if merged_result:
+        final = RasterLayer.empty_raster_layer_like(merged_result, filename=output_tif)
+        merged_result.save(final)
+
+def species_richness(
+    aohs_dir: str,
+    output_path: str,
+    processes_count: int
+) -> None:
+    output_dir, filename = os.path.split(output_path)
+    os.makedirs(output_dir, exist_ok=True)
+
+    aohs = glob("**/*.tif", root_dir=aohs_dir)
+    print(f"We fould {len(aohs)} AoH rasters")
+
+    species_rasters = {}
+    for raster_path in aohs:
+        speciesid = os.path.splitext(os.path.basename(raster_path))[0]
+        full_path = os.path.join(aohs_dir, raster_path)
+        try:
+            species_rasters[speciesid].add(full_path)
+        except KeyError:
+            species_rasters[speciesid] = set([full_path])
+
+    with tempfile.TemporaryDirectory() as tempdir:
+        with Manager() as manager:
+            source_queue = manager.Queue()
+
+            workers = [Process(target=stage_1_worker, args=(
+                f"{index}.tif",
+                tempdir,
+                source_queue
+            )) for index in range(processes_count)]
+            for worker_process in workers:
+                worker_process.start()
+
+            for species in species_rasters:
+                source_queue.put(species_rasters[species])
+            for _ in range(len(workers)):
+                source_queue.put(None)
+
+            processes = workers
+            while processes:
+                candidates = [x for x in processes if not x.is_alive()]
+                for candidate in candidates:
+                    candidate.join()
+                    if candidate.exitcode:
+                        for victim in processes:
+                            victim.kill()
+                        sys.exit(candidate.exitcode)
+                    processes.remove(candidate)
+                time.sleep(1)
+
+            # here we should have now a set of images in tempdir to merge
+            single_worker = Process(target=stage_2_worker, args=(
+                filename,
+                output_dir,
+                source_queue
+            ))
+            single_worker.start()
+            nextfiles = [os.path.join(tempdir, x) for x in glob("*.tif", root_dir=tempdir)]
+            for file in nextfiles:
+                source_queue.put(file)
+            source_queue.put(None)
+
+            processes = [single_worker]
+            while processes:
+                candidates = [x for x in processes if not x.is_alive()]
+                for candidate in candidates:
+                    candidate.join()
+                    if candidate.exitcode:
+                        for victim in processes:
+                            victim.kill()
+                        sys.exit(candidate.exitcode)
+                    processes.remove(candidate)
+                time.sleep(1)
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Calculate species richness")
+    parser.add_argument(
+        "--aohs_folder",
+        type=str,
+        required=True,
+        dest="aohs",
+        help="Folder containing set of AoHs"
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        required=True,
+        dest="output",
+        help="Destination GeoTIFF file for results."
+    )
+    parser.add_argument(
+        "-j",
+        type=int,
+        required=False,
+        default=round(cpu_count() / 2),
+        dest="processes_count",
+        help="Number of concurrent threads to use."
+    )
+    args = parser.parse_args()
+
+    species_richness(
+        args.aohs,
+        args.output,
+        args.processes_count
+    )
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/run.sh b/scripts/run.sh
index 4c52c0d..69b8a3a 100755
--- a/scripts/run.sh
+++ b/scripts/run.sh
@@ -132,3 +132,5 @@ do
                                     --b ${DATADIR}/deltap_final/summed_scaled_restore_0.25.tif \
                                     --output {$DATADIR}/analysis/restore_0.25_vs_${CURVE}.png
 done
+
+python3 ./predictors/species-richness.py --aohs_folder ${DATADIR}/aohs/current/ --output ${DATADIR}/predictors/species_richness.tif

From 27b66a9cd0b1ebcf20797d24b7bebb715f2bb040 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Thu, 7 Nov 2024 09:05:39 +0000
Subject: [PATCH 30/36] Update aoh-calculator

---
 aoh-calculator | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aoh-calculator b/aoh-calculator
index de5db6f..b6574cc 160000
--- a/aoh-calculator
+++ b/aoh-calculator
@@ -1 +1 @@
-Subproject commit de5db6fbe4b4cbcebf4ae04df9817fac680b700c
+Subproject commit b6574ccc027e3f81db011f793e334f3bbc0f2776

From b2ddf7d9f272a6d2ee207c5901107f9321a43048 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Thu, 7 Nov 2024 16:22:50 +0000
Subject: [PATCH 31/36] Only exclude species where caves are major importance

---
 prepare-species/extract_species_psql.py | 30 ++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/prepare-species/extract_species_psql.py b/prepare-species/extract_species_psql.py
index 8795ead..3710545 100644
--- a/prepare-species/extract_species_psql.py
+++ b/prepare-species/extract_species_psql.py
@@ -53,6 +53,7 @@
 HABITATS_STATEMENT = """
 SELECT
     assessment_habitats.supplementary_fields->>'season',
+    assessment_habitats.supplementary_fields->>'majorImportance',
     STRING_AGG(habitat_lookup.code, '|') AS full_habitat_code,
     STRING_AGG(system_lookup.description->>'en', '|') AS systems
 FROM
@@ -68,7 +69,7 @@
         assessment_habitats.supplementary_fields->>'suitability' IS NULL
         OR assessment_habitats.supplementary_fields->>'suitability' IN ('Suitable', 'Unknown')
     )
-GROUP BY (assessment_habitats.supplementary_fields->>'season')
+GROUP BY (assessment_habitats.supplementary_fields->>'season', assessment_habitats.supplementary_fields->>'majorImportance')
 """
 
 GEOMETRY_STATEMENT = """
@@ -147,7 +148,7 @@ def process_row(
     #    null
 
     habitats = {}
-    for season, habitat_values, systems in raw_habitats:
+    for season, major_importance, habitat_values, systems in raw_habitats:
 
         if season in ['passage', 'Passage']:
             continue
@@ -173,7 +174,7 @@ def process_row(
         habitat_set = set([x for x in habitat_values.split('|')])
         if len(habitat_set) == 0:
             continue
-        if any([x.startswith('7') for x in habitat_set]):
+        if any([x.startswith('7') for x in habitat_set]) and major_importance == 'Yes':
             logger.debug("Dropping %s: Habitat 7 in habitat list", id_no)
             return
 
@@ -213,7 +214,13 @@ def process_row(
     if seasons == {1}:
         # Resident only
         gdf = gpd.GeoDataFrame(
-            [[id_no, SEASON_NAME[1], int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, '|'.join(list(habitats[1])), geometries[1]]],
+            [[
+                id_no,
+                SEASON_NAME[1],
+                int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None,
+                '|'.join(list(habitats[1])),
+                geometries[1]
+            ]],
             columns=COLUMNS,
             crs='epsg:4326'
         )
@@ -258,14 +265,25 @@ def process_row(
             return
 
         gdf = gpd.GeoDataFrame(
-            [[id_no, SEASON_NAME[2], int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, '|'.join(list(habitats_breeding)), geometry_breeding]],
+            [[
+                id_no,
+                SEASON_NAME[2],
+                int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None,
+                '|'.join(list(habitats_breeding)),
+                geometry_breeding
+            ]],
             columns=COLUMNS,
             crs='epsg:4326'
         )
         tidy_reproject_save(gdf, output_directory_path)
 
         gdf = gpd.GeoDataFrame(
-            [[id_no, SEASON_NAME[3], int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None, '|'.join(list(habitats_non_breeding)), geometry_non_breeding]],
+            [[
+                id_no, SEASON_NAME[3],
+                int(elevation_lower) if elevation_lower else None, int(elevation_upper) if elevation_upper else None,
+                '|'.join(list(habitats_non_breeding)),
+                geometry_non_breeding
+            ]],
             columns=COLUMNS,
             crs='epsg:4326',
         )

From 80cfce7f91dd50cd26658a489a1dc541884fe5b4 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Fri, 8 Nov 2024 12:44:19 +0000
Subject: [PATCH 32/36] Script to calculate endemism

---
 predictors/endemism.py | 261 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 261 insertions(+)
 create mode 100644 predictors/endemism.py

diff --git a/predictors/endemism.py b/predictors/endemism.py
new file mode 100644
index 0000000..297f775
--- /dev/null
+++ b/predictors/endemism.py
@@ -0,0 +1,261 @@
+# Endemism is the geometric mean of the proportion of how much each cell contributes to a species total AoH.
+# Uses the trick from https://stackoverflow.com/questions/43099542/python-easy-way-to-do-geometric-mean-in-python
+# for calculating the geometric mean with less risk of overflow
+
+import argparse
+import os
+import sys
+import tempfile
+import time
+from glob import glob
+from multiprocessing import Manager, Process, Queue, cpu_count
+from typing import Set
+
+import numpy as np
+from osgeo import gdal
+from yirgacheffe.layers import RasterLayer
+
+def stage_1_worker(
+    filename: str,
+    result_dir: str,
+    input_queue: Queue,
+) -> None:
+    output_tif = os.path.join(result_dir, filename)
+
+    merged_result = None
+
+    while True:
+        raster_paths = input_queue.get()
+        if raster_paths is None:
+            break
+
+        rasters = [RasterLayer.layer_from_file(x) for x in raster_paths]
+
+        match len(rasters):
+            case 2:
+                union = RasterLayer.find_union(rasters)
+                for r in rasters:
+                    r.set_window_for_union(union)
+
+                aoh1 = rasters[0].sum()
+                if aoh1 > 0.0:
+                    season1 = rasters[0].numpy_apply(lambda a: np.nan_to_num(np.log(np.where(a == 0, np.nan, a) / aoh1)))
+                else:
+                    season1 = None
+                aoh2 = rasters[1].sum()
+                if aoh2 > 0.0:
+                    season2 = rasters[1].numpy_apply(lambda a: np.nan_to_num(np.log(np.where(a == 0, np.nan, a) / aoh2)))
+                else:
+                    season2 = None
+
+                match season1, season2:
+                    case None, None:
+                        continue
+                    case a, None:
+                        combined = a
+                    case None, b:
+                        combined = b
+                    case _, _:
+                        combined = season1.numpy_apply(lambda a, b: np.where(a > b, a, b), season2)
+
+                partial = RasterLayer.empty_raster_layer_like(rasters[0], datatype=gdal.GDT_Float64)
+                combined.save(partial)
+            case 1:
+                aoh = rasters[0].sum()
+                if aoh > 0.0:
+                    partial = rasters[0].numpy_apply(lambda a: np.nan_to_num(np.log(np.where(a == 0, np.nan, a) / aoh)))
+                else:
+                    continue
+            case _:
+                raise ValueError("too many seasons")
+
+        if merged_result is None:
+            if len(rasters) > 1:
+                merged_result = partial
+            else:
+                merged_result = RasterLayer.empty_raster_layer_like(rasters[0], datatype=gdal.GDT_Float64)
+                partial.save(merged_result)
+        else:
+            merged_result.reset_window()
+            if len(rasters) > 1:
+                union = RasterLayer.find_union([merged_result, partial])
+                partial.set_window_for_union(union)
+            else:
+                union = RasterLayer.find_union([merged_result, rasters[0]])
+                rasters[0].set_window_for_union(union)
+            merged_result.set_window_for_union(union)
+
+            merged = partial + merged_result
+            temp = RasterLayer.empty_raster_layer_like(merged_result)
+            merged.save(temp)
+            merged_result = temp
+
+    if merged_result is not None:
+        final = RasterLayer.empty_raster_layer_like(merged_result, filename=output_tif)
+        merged_result.save(final)
+
+def stage_2_worker(
+    filename: str,
+    result_dir: str,
+    input_queue: Queue,
+) -> None:
+    output_tif = os.path.join(result_dir, filename)
+
+    merged_result = None
+
+    while True:
+        path = input_queue.get()
+        if path is None:
+            break
+
+        with RasterLayer.layer_from_file(path) as partial_raster:
+            if merged_result is None:
+                merged_result = RasterLayer.empty_raster_layer_like(partial_raster)
+                cleaned_raster = partial_raster.numpy_apply(lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0))
+                cleaned_raster.save(merged_result)
+            else:
+                merged_result.reset_window()
+
+                union = RasterLayer.find_union([merged_result, partial_raster])
+                merged_result.set_window_for_union(union)
+                partial_raster.set_window_for_union(union)
+
+                calc = merged_result + partial_raster
+                temp = RasterLayer.empty_raster_layer_like(merged_result)
+                calc.save(temp)
+                merged_result = temp
+
+    if merged_result:
+        final = RasterLayer.empty_raster_layer_like(merged_result, filename=output_tif)
+        merged_result.save(final)
+
+def species_richness(
+    aohs_dir: str,
+    species_richness_path: str,
+    output_path: str,
+    processes_count: int
+) -> None:
+    output_dir, _ = os.path.split(output_path)
+    os.makedirs(output_dir, exist_ok=True)
+
+    aohs = glob("**/*.tif", root_dir=aohs_dir)
+    print(f"We fould {len(aohs)} AoH rasters")
+
+    species_rasters = {}
+    for raster_path in aohs:
+        speciesid = os.path.splitext(os.path.basename(raster_path))[0]
+        full_path = os.path.join(aohs_dir, raster_path)
+        try:
+            species_rasters[speciesid].add(full_path)
+        except KeyError:
+            species_rasters[speciesid] = set([full_path])
+
+    with tempfile.TemporaryDirectory() as tempdir:
+        with Manager() as manager:
+            source_queue = manager.Queue()
+
+            workers = [Process(target=stage_1_worker, args=(
+                f"{index}.tif",
+                tempdir,
+                source_queue
+            )) for index in range(processes_count)]
+            for worker_process in workers:
+                worker_process.start()
+
+            for species in species_rasters:
+                source_queue.put(species_rasters[species])
+            for _ in range(len(workers)):
+                source_queue.put(None)
+
+            processes = workers
+            while processes:
+                candidates = [x for x in processes if not x.is_alive()]
+                for candidate in candidates:
+                    candidate.join()
+                    if candidate.exitcode:
+                        for victim in processes:
+                            victim.kill()
+                        sys.exit(candidate.exitcode)
+                    processes.remove(candidate)
+                time.sleep(1)
+
+            # here we should have now a set of images in tempdir to merge
+            single_worker = Process(target=stage_2_worker, args=(
+                "summed_proportion.tif",
+                output_dir,
+                source_queue
+            ))
+            single_worker.start()
+            nextfiles = [os.path.join(tempdir, x) for x in glob("*.tif", root_dir=tempdir)]
+            for file in nextfiles:
+                source_queue.put(file)
+            source_queue.put(None)
+
+            processes = [single_worker]
+            while processes:
+                candidates = [x for x in processes if not x.is_alive()]
+                for candidate in candidates:
+                    candidate.join()
+                    if candidate.exitcode:
+                        for victim in processes:
+                            victim.kill()
+                        sys.exit(candidate.exitcode)
+                    processes.remove(candidate)
+                time.sleep(1)
+
+        with RasterLayer.layer_from_file(species_richness_path) as species_richness:
+            with RasterLayer.layer_from_file(os.path.join(output_dir, "summed_proportion.tif")) as summed_proportion:
+
+                intersection = RasterLayer.find_intersection([summed_proportion, species_richness])
+                summed_proportion.set_window_for_intersection(intersection)
+                species_richness.set_window_for_intersection(intersection)
+
+                cleaned_species_richness = species_richness.numpy_apply(lambda a: np.where(a > 0, a, np.nan))
+
+                with RasterLayer.empty_raster_layer_like(summed_proportion, filename=output_path, nodata=np.nan) as result:
+                    calc = summed_proportion.numpy_apply(lambda a, b: np.exp(a / b), cleaned_species_richness)
+                    calc.parallel_save(result)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Calculate species richness")
+    parser.add_argument(
+        "--aohs_folder",
+        type=str,
+        required=True,
+        dest="aohs",
+        help="Folder containing set of AoHs"
+    )
+    parser.add_argument(
+        "--species_richness",
+        type=str,
+        required=True,
+        dest="species_richness",
+        help="GeoTIFF containing species richness"
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        required=True,
+        dest="output",
+        help="Destination GeoTIFF file for results."
+    )
+    parser.add_argument(
+        "-j",
+        type=int,
+        required=False,
+        default=round(cpu_count() / 2),
+        dest="processes_count",
+        help="Number of concurrent threads to use."
+    )
+    args = parser.parse_args()
+
+    species_richness(
+        args.aohs,
+        args.species_richness,
+        args.output,
+        args.processes_count
+    )
+
+if __name__ == "__main__":
+    main()

From 26557782b055c6e332b515447e7bdbdb12f84a32 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Fri, 8 Nov 2024 12:47:41 +0000
Subject: [PATCH 33/36] Fix linter warnings

---
 .pylintrc                          |   2 +-
 deltap/delta_p_scaled_area.py      |  19 ++++-
 predictors/species-richness.py     |   1 -
 prepare-layers/make_arable_map.py  |   4 +-
 prepare-layers/make_current_map.py |   7 +-
 prepare-layers/make_diff_map.py    |   2 +-
 prepare-layers/make_restore_map.py |   2 +-
 tests/test_calculate.py            | 114 ++++++++++++++---------------
 utils/persistencegenerator.py      |   1 -
 utils/raster_sum.py                |   4 +-
 utils/regression_plot.py           |   6 +-
 utils/speciesgenerator.py          |   1 -
 12 files changed, 86 insertions(+), 77 deletions(-)

diff --git a/.pylintrc b/.pylintrc
index ac01bde..59327c8 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,4 +1,4 @@
 [FORMAT]
 max-line-length=120
 
-disable=C0114, C0115, C0116, W0511, R0801, R0902, R0912, R0913, R0914, R0915, R1705, W0231
\ No newline at end of file
+disable=C0104, C0114, C0115, C0116, W0511, R0801, R0902, R0912, R0913, R0914, R0915, R0917, R1705, W0231,
\ No newline at end of file
diff --git a/deltap/delta_p_scaled_area.py b/deltap/delta_p_scaled_area.py
index a308447..bc5703f 100644
--- a/deltap/delta_p_scaled_area.py
+++ b/deltap/delta_p_scaled_area.py
@@ -34,12 +34,20 @@ def delta_p_scaled_area(
     area_restore_filter = area_restore.numpy_apply(lambda c: np.where(c < SCALE, float('nan'), c)) / SCALE
 
     per_taxa_path = os.path.join(dirname, f"per_taxa_{basename}")
-    with RasterLayer.empty_raster_layer_like(area_restore, filename=per_taxa_path, nodata=float('nan'), bands=len(per_taxa)) as result:
+    with RasterLayer.empty_raster_layer_like(
+        area_restore,
+        filename=per_taxa_path,
+        nodata=float('nan'),
+        bands=len(per_taxa)
+    ) as result:
         for idx in range(len(per_taxa)):
             inlayer = per_taxa[idx]
             _, name = os.path.split(inlayer.name)
             result._dataset.GetRasterBand(idx+1).SetDescription(name[:-4])
-            scaled_filtered_layer = inlayer.numpy_apply(lambda il, af: np.where(af != 0, (il / af) * -1.0, float('nan')), area_restore_filter)
+            scaled_filtered_layer = inlayer.numpy_apply(
+                lambda il, af: np.where(af != 0, (il / af) * -1.0, float('nan')),
+                area_restore_filter
+            )
             scaled_filtered_layer.parallel_save(result, band=idx + 1)
 
     summed_output_path = os.path.join(dirname, f"summed_{basename}")
@@ -47,7 +55,10 @@ def delta_p_scaled_area(
         summed_layer = per_taxa[0]
         for layer in per_taxa[1:]:
             summed_layer = summed_layer + layer
-        scaled_filtered_layer = summed_layer.numpy_apply(lambda il, af: np.where(af != 0, (il / af) * -1.0, float('nan')), area_restore_filter)
+        scaled_filtered_layer = summed_layer.numpy_apply(
+            lambda il, af: np.where(af != 0, (il / af) * -1.0, float('nan')),
+            area_restore_filter
+        )
         scaled_filtered_layer.parallel_save(result)
 
 def main() -> None:
@@ -82,4 +93,4 @@ def main() -> None:
     )
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/predictors/species-richness.py b/predictors/species-richness.py
index 0716362..5dadd02 100644
--- a/predictors/species-richness.py
+++ b/predictors/species-richness.py
@@ -54,7 +54,6 @@ def stage_1_worker(
             else:
                 union = RasterLayer.find_union([merged_result, rasters[0]])
                 rasters[0].set_window_for_union(union)
-                partial = rasters[0].numpy_apply(lambda chunk: np.where(chunk == 0.0, 0, 1))
             merged_result.set_window_for_union(union)
 
 
diff --git a/prepare-layers/make_arable_map.py b/prepare-layers/make_arable_map.py
index 1288252..7236463 100644
--- a/prepare-layers/make_arable_map.py
+++ b/prepare-layers/make_arable_map.py
@@ -1,5 +1,5 @@
-import argparse 
-from typing import Dict, List, Optional
+import argparse
+from typing import Optional
 
 import numpy as np
 from alive_progress import alive_bar
diff --git a/prepare-layers/make_current_map.py b/prepare-layers/make_current_map.py
index c4e1f7c..684bf37 100644
--- a/prepare-layers/make_current_map.py
+++ b/prepare-layers/make_current_map.py
@@ -3,14 +3,13 @@
 from typing import Dict, Optional
 from multiprocessing import Pool, cpu_count, set_start_method
 
-import numpy as np
 import pandas as pd
 from alive_progress import alive_bar
 from yirgacheffe.layers import RasterLayer
 
-# From Eyres et al: The current layer maps IUCN level 1 and 2 habitats, but habitats in the PNV layer are mapped only at IUCN level 1, 
-# so to estimate species’ proportion of original AOH now remaining we could only use natural habitats mapped at level 1 and artificial 
-# habitats at level 2.
+# From Eyres et al: The current layer maps IUCN level 1 and 2 habitats, but habitats in the PNV layer are mapped
+# only at IUCN level 1, so to estimate species’ proportion of original AOH now remaining we could only use natural
+# habitats mapped at level 1 and artificial habitats at level 2.
 IUCN_CODE_ARTIFICAL = [
     "14", "14.1", "14.2", "14.3", "14.4", "14.5", "14.6"
 ]
diff --git a/prepare-layers/make_diff_map.py b/prepare-layers/make_diff_map.py
index 3e716ce..26d8066 100644
--- a/prepare-layers/make_diff_map.py
+++ b/prepare-layers/make_diff_map.py
@@ -2,7 +2,7 @@
 import os
 import shutil
 import tempfile
-from typing import Dict, List, Optional
+from typing import Optional
 
 from osgeo import gdal
 from alive_progress import alive_bar
diff --git a/prepare-layers/make_restore_map.py b/prepare-layers/make_restore_map.py
index dfe3cab..b26b37b 100644
--- a/prepare-layers/make_restore_map.py
+++ b/prepare-layers/make_restore_map.py
@@ -42,7 +42,7 @@ def make_restore_map(
             try:
                 intersection = RasterLayer.find_intersection([pnv, current])
             except ValueError:
-                print(f"Layers do not match in pixel scale or projection:\n", file=sys.stderr)
+                print("Layers do not match in pixel scale or projection:\n", file=sys.stderr)
                 print(f"\t{pnv_path}: {pnv.pixel_scale}, {pnv.projection}")
                 print(f"\t{current_path}: {current.pixel_scale}, {current.projection}")
                 sys.exit(-1)
diff --git a/tests/test_calculate.py b/tests/test_calculate.py
index 11a27cf..498814f 100644
--- a/tests/test_calculate.py
+++ b/tests/test_calculate.py
@@ -10,71 +10,71 @@
 import persistence
 
 class SingleValueLayer(YirgacheffeLayer):
-	"""Mocked layer to make testing calc function easier"""
-	def __init__(self, value: Any, width: int, height: int):
-		self.value = value
-		area = Area(
-			left = -180.0,
-			top = 90.0,
-			right = 180.0,
-			bottom = -90.0
-		)
-		super().__init__(area, None, WGS_84_PROJECTION)
-		self._window = Window(0, 0, width, height)
+    """Mocked layer to make testing calc function easier"""
+    def __init__(self, value: Any, width: int, height: int):
+        self.value = value
+        area = Area(
+            left = -180.0,
+            top = 90.0,
+            right = 180.0,
+            bottom = -90.0
+        )
+        super().__init__(area, None, WGS_84_PROJECTION)
+        self._window = Window(0, 0, width, height)
 
-	def read_array(self, xoffset: int, yoffset: int, xsize: int, ysize: int) -> Any:
-		assert (xoffset + xsize) <= self.window.xsize
-		assert (yoffset + ysize) <= self.window.ysize
-		return numpy.ones((ysize, xsize)) * self.value
+    def read_array(self, xoffset: int, yoffset: int, xsize: int, ysize: int) -> Any:
+        assert (xoffset + xsize) <= self.window.xsize
+        assert (yoffset + ysize) <= self.window.ysize
+        return numpy.ones((ysize, xsize)) * self.value
 
 @pytest.mark.parametrize(
-	"habitat,elevation,range,area,habitats,elevation_range,expected_area",
-	[
-		(100, 1234.0, True, 4.0, [100, 200, 300], (0.0, 10000.0), 4.0),
-		(100, 1234.0, False, 4.0, [100, 200, 300], (0.0, 10000.0), 0.0),
-		(100, 1234.0, True, 4.0, [200, 300], (0.0, 10000.0), 0.0),
-		(100, 1234.0, True, 4.0, [100, 200, 300], (0.0, 100.0), 0.0),
-		# (100, 1234.0, True, numpy.nan, [100, 200, 300], (0.0, 10000.0), 0.0),
-	]
+    "habitat,elevation,range,area,habitats,elevation_range,expected_area",
+    [
+        (100, 1234.0, True, 4.0, [100, 200, 300], (0.0, 10000.0), 4.0),
+        (100, 1234.0, False, 4.0, [100, 200, 300], (0.0, 10000.0), 0.0),
+        (100, 1234.0, True, 4.0, [200, 300], (0.0, 10000.0), 0.0),
+        (100, 1234.0, True, 4.0, [100, 200, 300], (0.0, 100.0), 0.0),
+        # (100, 1234.0, True, numpy.nan, [100, 200, 300], (0.0, 10000.0), 0.0),
+    ]
 )
 def test_calculate_simple(habitat,elevation,range,area,habitats,elevation_range,expected_area):
-	habitat_layer = SingleValueLayer(habitat, 1, 1)
-	elevation_layer = SingleValueLayer(elevation, 1, 1)
-	range_layer = SingleValueLayer(range, 1, 1)
-	area_layer = SingleValueLayer(area, 1, 1)
+    habitat_layer = SingleValueLayer(habitat, 1, 1)
+    elevation_layer = SingleValueLayer(elevation, 1, 1)
+    range_layer = SingleValueLayer(range, 1, 1)
+    area_layer = SingleValueLayer(area, 1, 1)
 
-	persistence.YSTEP = 1
-	area = persistence._calculate_cpu(
-		range_layer,
-		habitat_layer,
-		habitats,
-		elevation_layer,
-		elevation_range,
-		area_layer,
-		None
-	)
-	assert area == expected_area
+    persistence.YSTEP = 1
+    area = persistence._calculate_cpu(
+        range_layer,
+        habitat_layer,
+        habitats,
+        elevation_layer,
+        elevation_range,
+        area_layer,
+        None
+    )
+    assert area == expected_area
 
 @pytest.mark.parametrize("step_size", [1, 2, 3, 9, 10, 11])
 def test_calculate_step_sizes(step_size):
-	habitat_layer = SingleValueLayer(100, 10, 10)
-	elevation_layer = SingleValueLayer(1234.0, 10, 10)
-	range_layer = SingleValueLayer(True, 10, 10)
+    habitat_layer = SingleValueLayer(100, 10, 10)
+    elevation_layer = SingleValueLayer(1234.0, 10, 10)
+    range_layer = SingleValueLayer(True, 10, 10)
 
-	# we want a non uniform area to make this interesting
-	area_dataset = gdal.GetDriverByName('mem').Create('mem', 1, 10, 1, gdal.GDT_Float32, [])
-	area_dataset.SetGeoTransform([-180.0, 180.0, 0.0, 90.0, 0.0, -18.0])
-	area_dataset.GetRasterBand(1).WriteArray(numpy.array([[float(x)] for x in range(1, 11)]), 0, 0)
-	area_layer = UniformAreaLayer(area_dataset)
+    # we want a non uniform area to make this interesting
+    area_dataset = gdal.GetDriverByName('mem').Create('mem', 1, 10, 1, gdal.GDT_Float32, [])
+    area_dataset.SetGeoTransform([-180.0, 180.0, 0.0, 90.0, 0.0, -18.0])
+    area_dataset.GetRasterBand(1).WriteArray(numpy.array([[float(x)] for x in range(1, 11)]), 0, 0)
+    area_layer = UniformAreaLayer(area_dataset)
 
-	persistence.YSTEP = step_size
-	area = persistence._calculate_cpu(
-		range_layer,
-		habitat_layer,
-		[100, 200, 300],
-		elevation_layer,
-		(0.0, 10000.0),
-		area_layer,
-		None
-	)
-	assert area == 550.0
+    persistence.YSTEP = step_size
+    area = persistence._calculate_cpu(
+        range_layer,
+        habitat_layer,
+        [100, 200, 300],
+        elevation_layer,
+        (0.0, 10000.0),
+        area_layer,
+        None
+    )
+    assert area == 550.0
diff --git a/utils/persistencegenerator.py b/utils/persistencegenerator.py
index 49cd467..aa39672 100644
--- a/utils/persistencegenerator.py
+++ b/utils/persistencegenerator.py
@@ -2,7 +2,6 @@
 
 import argparse
 import os
-from typing import List, Set
 
 import pandas as pd
 
diff --git a/utils/raster_sum.py b/utils/raster_sum.py
index 0165b97..fc7579d 100644
--- a/utils/raster_sum.py
+++ b/utils/raster_sum.py
@@ -36,7 +36,9 @@ def worker(
                 merged_result.set_window_for_union(union)
                 partial_raster.set_window_for_union(union)
 
-                calc = merged_result + (partial_raster.numpy_apply(lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0)))
+                calc = merged_result + (
+                    partial_raster.numpy_apply(lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0))
+                )
                 temp = RasterLayer.empty_raster_layer_like(merged_result, datatype=gdal.GDT_Float64)
                 calc.save(temp)
                 merged_result = temp
diff --git a/utils/regression_plot.py b/utils/regression_plot.py
index 819654e..432e5cd 100644
--- a/utils/regression_plot.py
+++ b/utils/regression_plot.py
@@ -10,7 +10,7 @@
 import numpy as np
 from yirgacheffe.layers import RasterLayer
 
-def filter(chunks):
+def filter_data(chunks):
     a_chunk, b_chunk = chunks
     res = []
     for a, b in zip(a_chunk, b_chunk):
@@ -42,14 +42,14 @@ def regression_plot(
             b_pixels = b_layer.read_array(0, 0, b_layer.window.xsize, b_layer.window.ysize)
 
     with Pool(processes=cpu_count() // 2) as pool:
-        filtered_chunk_pairs = pool.map(filter, zip(a_pixels, b_pixels))
+        filtered_chunk_pairs = pool.map(filter_data, zip(a_pixels, b_pixels))
         filtered_pairs = functools.reduce(operator.iconcat, filtered_chunk_pairs, [])
         sampled_pairs = random.sample(filtered_pairs, len(filtered_pairs) // 10)
         a_filtered, b_filtered = zip(*sampled_pairs)
 
     # m, b = np.polyfit(a_filtered, b_filtered, 1)
 
-    fig, ax = plt.subplots()
+    _fig, ax = plt.subplots()
     ax.scatter(x=a_filtered, y=b_filtered, marker=",")
     plt.xlabel(os.path.basename(a_path))
     plt.ylabel(os.path.basename(b_path))
diff --git a/utils/speciesgenerator.py b/utils/speciesgenerator.py
index 0a15d0a..24a0eb6 100644
--- a/utils/speciesgenerator.py
+++ b/utils/speciesgenerator.py
@@ -2,7 +2,6 @@
 
 import argparse
 import os
-from typing import List, Set
 
 import pandas as pd
 

From f4562738e32a1a37eb19f5896ffe80e8313cbe64 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Fri, 8 Nov 2024 12:56:56 +0000
Subject: [PATCH 34/36] Rename species richness script

---
 predictors/{species-richness.py => species_richness.py} | 0
 scripts/run.sh                                          | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename predictors/{species-richness.py => species_richness.py} (100%)

diff --git a/predictors/species-richness.py b/predictors/species_richness.py
similarity index 100%
rename from predictors/species-richness.py
rename to predictors/species_richness.py
diff --git a/scripts/run.sh b/scripts/run.sh
index 69b8a3a..79358ce 100755
--- a/scripts/run.sh
+++ b/scripts/run.sh
@@ -133,4 +133,4 @@ do
                                     --output {$DATADIR}/analysis/restore_0.25_vs_${CURVE}.png
 done
 
-python3 ./predictors/species-richness.py --aohs_folder ${DATADIR}/aohs/current/ --output ${DATADIR}/predictors/species_richness.tif
+python3 ./predictors/species_richness.py --aohs_folder ${DATADIR}/aohs/current/ --output ${DATADIR}/predictors/species_richness.tif

From ed558ed36c84ea8a1efd55ecdc6ea13de3c07763 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Fri, 8 Nov 2024 14:28:54 +0000
Subject: [PATCH 35/36] Fix linter warnings

---
 .pylintrc                                     |   2 +-
 deltap/delta_p_scaled_area.py                 |   5 +-
 ..._128.py => global_code_residents_pixel.py} | 103 ++++++++-----
 method.md                                     |  24 +--
 predictors/endemism.py                        |  31 ++--
 predictors/species_richness.py                |   9 +-
 prepare-layers/make_area_map.py               |   4 +-
 prepare-layers/make_current_map.py            |  10 +-
 prepare-layers/make_restore_map.py            |   2 +-
 prepare-species/cleaning.py                   |  48 ------
 prepare-species/extract_species_psql.py       |  32 ++--
 prepare-species/species_prep.py               | 144 ------------------
 scripts/run.sh                                |   2 +-
 tests/test_calculate.py                       |  80 ----------
 utils/regression_plot.py                      |   1 -
 15 files changed, 132 insertions(+), 365 deletions(-)
 rename deltap/{global_code_residents_pixel_AE_128.py => global_code_residents_pixel.py} (75%)
 delete mode 100644 prepare-species/cleaning.py
 delete mode 100644 prepare-species/species_prep.py
 delete mode 100644 tests/test_calculate.py

diff --git a/.pylintrc b/.pylintrc
index 59327c8..56e254c 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,4 +1,4 @@
 [FORMAT]
 max-line-length=120
 
-disable=C0104, C0114, C0115, C0116, W0511, R0801, R0902, R0912, R0913, R0914, R0915, R0917, R1705, W0231,
\ No newline at end of file
+disable=C3001, C0104, C0114, C0115, C0116, W0511, R0801, R0902, R0911, R0912, R0913, R0914, R0915, R0917, R1705, W0231,
\ No newline at end of file
diff --git a/deltap/delta_p_scaled_area.py b/deltap/delta_p_scaled_area.py
index bc5703f..e46aa4a 100644
--- a/deltap/delta_p_scaled_area.py
+++ b/deltap/delta_p_scaled_area.py
@@ -40,10 +40,9 @@ def delta_p_scaled_area(
         nodata=float('nan'),
         bands=len(per_taxa)
     ) as result:
-        for idx in range(len(per_taxa)):
-            inlayer = per_taxa[idx]
+        for idx, inlayer in enumerate(per_taxa):
             _, name = os.path.split(inlayer.name)
-            result._dataset.GetRasterBand(idx+1).SetDescription(name[:-4])
+            result._dataset.GetRasterBand(idx+1).SetDescription(name[:-4])  # pylint: disable=W0212
             scaled_filtered_layer = inlayer.numpy_apply(
                 lambda il, af: np.where(af != 0, (il / af) * -1.0, float('nan')),
                 area_restore_filter
diff --git a/deltap/global_code_residents_pixel_AE_128.py b/deltap/global_code_residents_pixel.py
similarity index 75%
rename from deltap/global_code_residents_pixel_AE_128.py
rename to deltap/global_code_residents_pixel.py
index 079a83f..5677be9 100644
--- a/deltap/global_code_residents_pixel_AE_128.py
+++ b/deltap/global_code_residents_pixel.py
@@ -16,9 +16,9 @@
 GOMPERTZ_ALPHA = 1
 
 class Season(Enum):
-   RESIDENT = 1
-   BREEDING = 2
-   NONBREEDING = 3
+    RESIDENT = 1
+    BREEDING = 2
+    NONBREEDING = 3
 
 def gen_gompertz(x: float) -> float:
     return math.exp(-math.exp(GOMPERTZ_A + (GOMPERTZ_B * (x ** GOMPERTZ_ALPHA))))
@@ -36,22 +36,28 @@ def open_layer_as_float64(filename: str) -> RasterLayer:
     layer.save(layer64)
     return layer64
 
-def calc_persistence_value(current_AOH: float, historic_AOH: float, exponent_func) -> float:
-    sp_P = exponent_func(current_AOH / historic_AOH)
-    sp_P_fix = np.where(sp_P > 1, 1, sp_P)
-    return sp_P_fix
-
-def process_delta_p(current: RasterLayer, scenario: RasterLayer, current_AOH: float, historic_AOH: float, exponent_func_raster) -> RasterLayer:
-    # In theory we could recalc current_AOH, but given we already have it don't duplicate work
+def calc_persistence_value(current_aoh: float, historic_aoh: float, exponent_func) -> float:
+    sp_p = exponent_func(current_aoh / historic_aoh)
+    sp_p_fix = np.where(sp_p > 1, 1, sp_p)
+    return sp_p_fix
+
+def process_delta_p(
+    current: RasterLayer,
+    scenario: RasterLayer,
+    current_aoh: float,
+    historic_aoh: float,
+    exponent_func_raster
+) -> RasterLayer:
+    # In theory we could recalc current_aoh, but given we already have it don't duplicate work
     # New section added in: Calculating for rasters rather than csv's
-    const_layer = ConstantLayer(current_AOH) # MAKE A LAYER WITH THE SAME PROPERTIES AS CURRENT AOH RASTER BUT FILLED WITH THE CURRENT AOH
-    calc_1 = (const_layer - current) + scenario # FIRST CALCULATION : NEW AOH
-    new_AOH = RasterLayer.empty_raster_layer_like(current)
-    calc_1.save(new_AOH)
+    const_layer = ConstantLayer(current_aoh)
+    calc_1 = (const_layer - current) + scenario
+    new_aoh = RasterLayer.empty_raster_layer_like(current)
+    calc_1.save(new_aoh)
 
-    calc_2 = (new_AOH / historic_AOH).numpy_apply(exponent_func_raster)
+    calc_2 = (new_aoh / historic_aoh).numpy_apply(exponent_func_raster)
     calc_2 = calc_2.numpy_apply(lambda chunk: np.where(chunk > 1, 1, chunk))
-    new_p = RasterLayer.empty_raster_layer_like(new_AOH)
+    new_p = RasterLayer.empty_raster_layer_like(new_aoh)
     calc_2.save(new_p)
 
     return new_p
@@ -101,16 +107,16 @@ def global_code_residents_pixel_ae(
                 scenario = ConstantLayer(0.0)
 
             try:
-                historic_AOH = RasterLayer.layer_from_file(os.path.join(historic_aohs_path, filename)).sum()
-            except FileNotFoundError as fnf:
+                historic_aoh = RasterLayer.layer_from_file(os.path.join(historic_aohs_path, filename)).sum()
+            except FileNotFoundError:
                 print(f"Failed to open historic layer {os.path.join(historic_aohs_path, filename)}")
                 sys.exit()
 
-            if historic_AOH == 0.0:
+            if historic_aoh == 0.0:
                 print(f"Historic AoH for {taxid} is zero, aborting")
                 sys.exit()
 
-            # print(f"current: {current.sum()}\nscenario: {scenario.sum()}\nhistoric: {historic_AOH.sum()}")
+            # print(f"current: {current.sum()}\nscenario: {scenario.sum()}\nhistoric: {historic_aoh.sum()}")
 
             layers = [current, scenario]
             union = RasterLayer.find_union(layers)
@@ -120,12 +126,12 @@ def global_code_residents_pixel_ae(
                 except ValueError:
                     pass
 
-            current_AOH = current.sum()
+            current_aoh = current.sum()
 
-            new_p_layer = process_delta_p(current, scenario, current_AOH, historic_AOH, z_exponent_func_raster)
+            new_p_layer = process_delta_p(current, scenario, current_aoh, historic_aoh, z_exponent_func_raster)
             print(new_p_layer.sum())
 
-            old_persistence = calc_persistence_value(current_AOH, historic_AOH, z_exponent_func_float)
+            old_persistence = calc_persistence_value(current_aoh, historic_aoh, z_exponent_func_float)
             print(old_persistence)
             calc = new_p_layer - ConstantLayer(old_persistence)
 
@@ -140,16 +146,18 @@ def global_code_residents_pixel_ae(
             breeding_filename = f"{taxid}_{Season.BREEDING.name}.tif"
 
             try:
-                historic_AOH_breeding = RasterLayer.layer_from_file(os.path.join(historic_aohs_path, breeding_filename)).sum()
-                if historic_AOH_breeding == 0.0:
+                with RasterLayer.layer_from_file(os.path.join(historic_aohs_path, breeding_filename)) as aoh:
+                    historic_aoh_breeding = aoh.sum()
+                if historic_aoh_breeding == 0.0:
                     print(f"Historic AoH breeding for {taxid} is zero, aborting")
                     sys.exit()
             except FileNotFoundError:
                 print(f"Historic AoH for breeding {taxid} not found, aborting")
                 sys.exit()
             try:
-                historic_AOH_non_breeding = RasterLayer.layer_from_file(os.path.join(historic_aohs_path, nonbreeding_filename)).sum()
-                if historic_AOH_non_breeding == 0.0:
+                with RasterLayer.layer_from_file(os.path.join(historic_aohs_path, nonbreeding_filename)) as aoh:
+                    historic_aoh_non_breeding = aoh.sum()
+                if historic_aoh_non_breeding == 0.0:
                     print(f"Historic AoH for non breeding {taxid} is zero, aborting")
                     sys.exit()
             except FileNotFoundError:
@@ -166,22 +174,22 @@ def global_code_residents_pixel_ae(
 
             try:
                 current_breeding = open_layer_as_float64(os.path.join(current_aohs_path, breeding_filename))
-            except FileNotFoundError as fnf:
+            except FileNotFoundError:
                 print(f"Failed to open current breeding {os.path.join(current_aohs_path, breeding_filename)}")
                 sys.exit()
             try:
                 current_non_breeding = open_layer_as_float64(os.path.join(current_aohs_path, nonbreeding_filename))
-            except FileNotFoundError as fnf:
+            except FileNotFoundError:
                 print(f"Failed to open current non breeding {os.path.join(current_aohs_path, nonbreeding_filename)}")
                 sys.exit()
             try:
                 scenario_breeding = open_layer_as_float64(breeding_scenario_path)
-            except FileNotFoundError as fnf:
+            except FileNotFoundError:
                 # If there is a current but now scenario file it's because the species went extinct under the scenario
                 scenario_breeding = ConstantLayer(0.0)
             try:
                 scenario_non_breeding = open_layer_as_float64(non_breeding_scenario_path)
-            except FileNotFoundError as fnf:
+            except FileNotFoundError:
                 # If there is a current but now scenario file it's because the species went extinct under the scenario
                 scenario_non_breeding = ConstantLayer(0.0)
 
@@ -193,17 +201,36 @@ def global_code_residents_pixel_ae(
                 except ValueError:
                     pass
 
-            current_AOH_breeding = current_breeding.sum()
-            persistence_breeding = calc_persistence_value(current_AOH_breeding, historic_AOH_breeding, z_exponent_func_float)
+            current_aoh_breeding = current_breeding.sum()
+            persistence_breeding = calc_persistence_value(
+                current_aoh_breeding,
+                historic_aoh_breeding,
+                z_exponent_func_float
+            )
 
-            current_AOH_non_breeding = current_non_breeding.sum()
-            persistence_non_breeding = calc_persistence_value(current_AOH_non_breeding, historic_AOH_non_breeding, z_exponent_func_float)
+            current_aoh_non_breeding = current_non_breeding.sum()
+            persistence_non_breeding = calc_persistence_value(
+                current_aoh_non_breeding,
+                historic_aoh_non_breeding,
+                z_exponent_func_float
+            )
 
             old_persistence = (persistence_breeding ** 0.5) * (persistence_non_breeding ** 0.5)
 
-            new_p_breeding = process_delta_p(current_breeding, scenario_breeding, current_AOH_breeding, historic_AOH_breeding, z_exponent_func_raster)
-            new_p_non_breeding = process_delta_p(current_non_breeding, scenario_non_breeding, current_AOH_non_breeding, historic_AOH_non_breeding, z_exponent_func_raster)
-
+            new_p_breeding = process_delta_p(
+                current_breeding,
+                scenario_breeding,
+                current_aoh_breeding,
+                historic_aoh_breeding,
+                z_exponent_func_raster
+            )
+            new_p_non_breeding = process_delta_p(
+                current_non_breeding,
+                scenario_non_breeding,
+                current_aoh_non_breeding,
+                historic_aoh_non_breeding,
+                z_exponent_func_raster
+            )
             new_p_layer = (new_p_breeding ** 0.5) * (new_p_non_breeding ** 0.5)
 
             delta_p_layer = new_p_layer - ConstantLayer(old_persistence)
diff --git a/method.md b/method.md
index c9316bb..1f6af8d 100644
--- a/method.md
+++ b/method.md
@@ -313,21 +313,21 @@ For each species we use the AoH data to calculate the likelihood of extinction u
 
 
 ```shark-run:deltap
-python3 ./deltap/global_code_residents_pixel_AE_128.py --speciesdata /data/species-info/%{TAXA}/current/* \
-                                                       --current_path /data/aohs/current/%{TAXA}/ \
-                                                       --scenario_path /data/aohs/restore/%{TAXA}/ \
-                                                       --historic_path /data/aohs/pnv/%{TAXA}/ \
-                                                       --z %{CURVE} \
-                                                       --output_path /data/deltap/restore/%{CURVE}/%{TAXA}/
+python3 ./deltap/global_code_residents_pixel.py --speciesdata /data/species-info/%{TAXA}/current/* \
+                                                --current_path /data/aohs/current/%{TAXA}/ \
+                                                --scenario_path /data/aohs/restore/%{TAXA}/ \
+                                                --historic_path /data/aohs/pnv/%{TAXA}/ \
+                                                --z %{CURVE} \
+                                                --output_path /data/deltap/restore/%{CURVE}/%{TAXA}/
 
 python3 ./utils/raster_sum.py --rasters_directory /data/deltap/restore/%{CURVE}/%{TAXA}/ --output /data/deltap_sum/restore/%{CURVE}/%{TAXA}.tif
 
-python3 ./deltap/global_code_residents_pixel_AE_128.py --speciesdata /data/species-info/%{TAXA}/current/* \
-                                                       --current_path /data/aohs/current/%{TAXA}/ \
-                                                       --scenario_path /data/aohs/arable/%{TAXA}/ \
-                                                       --historic_path /data/aohs/pnv/%{TAXA}/ \
-                                                       --z %{CURVE} \
-                                                       --output_path /data/deltap/arable/%{CURVE}/%{TAXA}/
+python3 ./deltap/global_code_residents_pixel --speciesdata /data/species-info/%{TAXA}/current/* \
+                                             --current_path /data/aohs/current/%{TAXA}/ \
+                                             --scenario_path /data/aohs/arable/%{TAXA}/ \
+                                             --historic_path /data/aohs/pnv/%{TAXA}/ \
+                                             --z %{CURVE} \
+                                             --output_path /data/deltap/arable/%{CURVE}/%{TAXA}/
 
 python3 ./utils/raster_sum.py --rasters_directory /data/deltap/arable/%{CURVE}/%{TAXA}/ --output /data/deltap_sum/arable/%{CURVE}/%{TAXA}.tif
 ```
diff --git a/predictors/endemism.py b/predictors/endemism.py
index 297f775..88a1ce1 100644
--- a/predictors/endemism.py
+++ b/predictors/endemism.py
@@ -9,7 +9,6 @@
 import time
 from glob import glob
 from multiprocessing import Manager, Process, Queue, cpu_count
-from typing import Set
 
 import numpy as np
 from osgeo import gdal
@@ -39,12 +38,16 @@ def stage_1_worker(
 
                 aoh1 = rasters[0].sum()
                 if aoh1 > 0.0:
-                    season1 = rasters[0].numpy_apply(lambda a: np.nan_to_num(np.log(np.where(a == 0, np.nan, a) / aoh1)))
+                    season1 = rasters[0].numpy_apply(
+                        lambda a: np.nan_to_num(np.log(np.where(a == 0, np.nan, a) / aoh1))
+                    )
                 else:
                     season1 = None
                 aoh2 = rasters[1].sum()
                 if aoh2 > 0.0:
-                    season2 = rasters[1].numpy_apply(lambda a: np.nan_to_num(np.log(np.where(a == 0, np.nan, a) / aoh2)))
+                    season2 = rasters[1].numpy_apply(
+                        lambda a: np.nan_to_num(np.log(np.where(a == 0, np.nan, a) / aoh2))
+                    )
                 else:
                     season2 = None
 
@@ -63,7 +66,9 @@ def stage_1_worker(
             case 1:
                 aoh = rasters[0].sum()
                 if aoh > 0.0:
-                    partial = rasters[0].numpy_apply(lambda a: np.nan_to_num(np.log(np.where(a == 0, np.nan, a) / aoh)))
+                    partial = rasters[0].numpy_apply(
+                        lambda a: np.nan_to_num(np.log(np.where(a == 0, np.nan, a) / aoh))
+                    )
                 else:
                     continue
             case _:
@@ -111,7 +116,9 @@ def stage_2_worker(
         with RasterLayer.layer_from_file(path) as partial_raster:
             if merged_result is None:
                 merged_result = RasterLayer.empty_raster_layer_like(partial_raster)
-                cleaned_raster = partial_raster.numpy_apply(lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0))
+                cleaned_raster = partial_raster.numpy_apply(
+                    lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0)
+                )
                 cleaned_raster.save(merged_result)
             else:
                 merged_result.reset_window()
@@ -129,7 +136,7 @@ def stage_2_worker(
         final = RasterLayer.empty_raster_layer_like(merged_result, filename=output_tif)
         merged_result.save(final)
 
-def species_richness(
+def endemism(
     aohs_dir: str,
     species_richness_path: str,
     output_path: str,
@@ -162,8 +169,8 @@ def species_richness(
             for worker_process in workers:
                 worker_process.start()
 
-            for species in species_rasters:
-                source_queue.put(species_rasters[species])
+            for raster in species_rasters.items():
+                source_queue.put(raster)
             for _ in range(len(workers)):
                 source_queue.put(None)
 
@@ -212,7 +219,11 @@ def species_richness(
 
                 cleaned_species_richness = species_richness.numpy_apply(lambda a: np.where(a > 0, a, np.nan))
 
-                with RasterLayer.empty_raster_layer_like(summed_proportion, filename=output_path, nodata=np.nan) as result:
+                with RasterLayer.empty_raster_layer_like(
+                    summed_proportion,
+                    filename=output_path,
+                    nodata=np.nan
+                ) as result:
                     calc = summed_proportion.numpy_apply(lambda a, b: np.exp(a / b), cleaned_species_richness)
                     calc.parallel_save(result)
 
@@ -250,7 +261,7 @@ def main() -> None:
     )
     args = parser.parse_args()
 
-    species_richness(
+    endemism(
         args.aohs,
         args.species_richness,
         args.output,
diff --git a/predictors/species_richness.py b/predictors/species_richness.py
index 5dadd02..1c2e395 100644
--- a/predictors/species_richness.py
+++ b/predictors/species_richness.py
@@ -5,7 +5,6 @@
 import time
 from glob import glob
 from multiprocessing import Manager, Process, Queue, cpu_count
-from typing import Set
 
 import numpy as np
 from osgeo import gdal
@@ -92,7 +91,9 @@ def stage_2_worker(
                 merged_result.set_window_for_union(union)
                 partial_raster.set_window_for_union(union)
 
-                calc = merged_result + (partial_raster.numpy_apply(lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0)))
+                calc = merged_result + (partial_raster.numpy_apply(
+                    lambda chunk: np.nan_to_num(chunk, copy=False, nan=0.0))
+                )
                 temp = RasterLayer.empty_raster_layer_like(merged_result)
                 calc.save(temp)
                 merged_result = temp
@@ -133,8 +134,8 @@ def species_richness(
             for worker_process in workers:
                 worker_process.start()
 
-            for species in species_rasters:
-                source_queue.put(species_rasters[species])
+            for raster in species_rasters.items():
+                source_queue.put(raster)
             for _ in range(len(workers)):
                 source_queue.put(None)
 
diff --git a/prepare-layers/make_area_map.py b/prepare-layers/make_area_map.py
index b38d235..e6dba2a 100644
--- a/prepare-layers/make_area_map.py
+++ b/prepare-layers/make_area_map.py
@@ -41,7 +41,7 @@ def make_area_map(
     output_path: str
 ) -> None:
     pixels = [0,] * math.floor(90.0 / pixel_scale)
-    for i in range(len(pixels)):
+    for i in range(len(pixels)):  # pylint: disable=C0200
         y = (i + 0.5) * pixel_scale
         area = area_of_pixel(pixel_scale, y)
         pixels[i] = area
@@ -60,7 +60,7 @@ def make_area_map(
         gdal.GDT_Float32,
         filename=output_path
     ) as res:
-        res._dataset.WriteArray(allpixels, 0, 0)
+        res._dataset.WriteArray(allpixels, 0, 0)  # pylint: disable=W0212
 
 
 def main() -> None:
diff --git a/prepare-layers/make_current_map.py b/prepare-layers/make_current_map.py
index 684bf37..cf53f3a 100644
--- a/prepare-layers/make_current_map.py
+++ b/prepare-layers/make_current_map.py
@@ -1,7 +1,7 @@
-import argparse 
+import argparse
 import itertools
 from typing import Dict, Optional
-from multiprocessing import Pool, cpu_count, set_start_method
+from multiprocessing import set_start_method
 
 import pandas as pd
 from alive_progress import alive_bar
@@ -37,11 +37,11 @@ def make_current_map(
 
         map_preserve_code = list(itertools.chain.from_iterable([crosswalk[x] for x in IUCN_CODE_ARTIFICAL]))
 
-        def filter(a):
-            import numpy as np
+        def filter_data(a):
+            import numpy as np  # pylint: disable=C0415
             return np.where(np.isin(a, map_preserve_code), a, (np.floor(a / 100) * 100).astype(int))
 
-        calc = current.numpy_apply(filter)
+        calc = current.numpy_apply(filter_data)
 
         with RasterLayer.empty_raster_layer_like(
             current,
diff --git a/prepare-layers/make_restore_map.py b/prepare-layers/make_restore_map.py
index b26b37b..0fadf75 100644
--- a/prepare-layers/make_restore_map.py
+++ b/prepare-layers/make_restore_map.py
@@ -1,4 +1,4 @@
-import argparse 
+import argparse
 import itertools
 import sys
 from typing import Dict, Optional
diff --git a/prepare-species/cleaning.py b/prepare-species/cleaning.py
deleted file mode 100644
index 718a92c..0000000
--- a/prepare-species/cleaning.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import math
-
-import pandas as pd
-
-ELEVATION_MIN = -500
-ELEVATION_MAX = 9000
-
-def tidy_data(row: pd.Series) -> pd.Series:
-    """Tidy up the data as per Busana et al"""
-
-    # Lower elevation higher than upper elevation
-    if not pd.isna(row.elevation_lower) and not pd.isna(row.elevation_upper):
-        if row.elevation_lower > row.elevation_upper:
-            row.elevation_lower = ELEVATION_MIN
-            row.elevation_upper = ELEVATION_MAX
-
-    # Missing lower and/or upper elevation
-    if pd.isna(row.elevation_lower):
-        row.elevation_lower = ELEVATION_MIN
-        if not pd.isna(row.elevation_upper) and row.elevation_upper < ELEVATION_MIN:
-            row.elevation_upper = ELEVATION_MAX
-    if pd.isna(row.elevation_upper):
-        row.elevation_upper = ELEVATION_MAX
-        if row.elevation_lower > ELEVATION_MAX:
-            row.elevation_lower = ELEVATION_MIN
-
-    # Lower elevation < -500 and/or upper elevation > 9000
-    row.elevation_lower = max(ELEVATION_MIN, row.elevation_lower)
-    row.elevation_upper = min(ELEVATION_MAX, row.elevation_upper)
-
-    # Small difference (<50m) between lower and upper elevation
-    elevation_diff = row.elevation_upper - row.elevation_lower
-    if elevation_diff < 50:
-        spare = 50 - elevation_diff
-        adjust = math.ceil(spare / 2)
-        row.elevation_lower -= adjust
-        row.elevation_upper += adjust
-
-        if row.elevation_lower < ELEVATION_MIN:
-            adjust = ELEVATION_MIN - row.elevation_lower
-            row.elevation_lower += adjust
-            row.elevation_upper += adjust
-        elif row.elevation_upper > ELEVATION_MAX:
-            adjust = row.elevation_upper - ELEVATION_MAX
-            row.elevation_lower -= adjust
-            row.elevation_upper -= adjust
-
-    return row
diff --git a/prepare-species/extract_species_psql.py b/prepare-species/extract_species_psql.py
index 3710545..a84eddc 100644
--- a/prepare-species/extract_species_psql.py
+++ b/prepare-species/extract_species_psql.py
@@ -1,4 +1,5 @@
 import argparse
+import importlib
 import logging
 import os
 from functools import partial
@@ -12,7 +13,7 @@
 import shapely
 from postgis.psycopg import register
 
-from cleaning import tidy_data
+aoh_cleaning = importlib.import_module("aoh-calculator.cleaning")
 
 logger = logging.getLogger(__name__)
 logging.basicConfig()
@@ -105,7 +106,7 @@ def tidy_reproject_save(
     target_crs = src_crs #pyproj.CRS.from_string(target_projection)
 
     graw = gdf.loc[0].copy()
-    grow = tidy_data(graw)
+    grow = aoh_cleaning.tidy_data(graw)
     os.makedirs(output_directory_path, exist_ok=True)
     output_path = os.path.join(output_directory_path, f"{grow.id_no}_{grow.season}.geojson")
     res = gpd.GeoDataFrame(grow.to_frame().transpose(), crs=src_crs, geometry="geometry")
@@ -150,16 +151,17 @@ def process_row(
     habitats = {}
     for season, major_importance, habitat_values, systems in raw_habitats:
 
-        if season in ['passage', 'Passage']:
-            continue
-        elif season in ['resident', 'Resident', 'Seasonal Occurrence Unknown', 'unknown', None]:
-            season_code = 1
-        elif season in ['breeding', 'Breeding Season']:
-            season_code = 2
-        elif season in ['non-breeding', 'Non-Breeding Season']:
-            season_code = 3
-        else:
-            raise ValueError(f"Unexpected season {season} for {id_no}")
+        match season:
+            case 'passage', 'Passage':
+                continue
+            case 'resident', 'Resident', 'Seasonal Occurrence Unknown', 'unknown', None:
+                season_code = 1
+            case 'breeding', 'Breeding Season':
+                season_code = 2
+            case 'non-breeding', 'Non-Breeding Season':
+                season_code = 3
+            case _:
+                raise ValueError(f"Unexpected season {season} for {id_no}")
 
         if systems is None:
             logger.debug("Dropping %s: no systems in DB", id_no)
@@ -171,10 +173,10 @@ def process_row(
         if habitat_values is None:
             logger.debug("Dropping %s: no habitats in DB", id_no)
             continue
-        habitat_set = set([x for x in habitat_values.split('|')])
+        habitat_set = set(habitat_values.split('|'))
         if len(habitat_set) == 0:
             continue
-        if any([x.startswith('7') for x in habitat_set]) and major_importance == 'Yes':
+        if any(x.startswith('7') for x in habitat_set) and major_importance == 'Yes':
             logger.debug("Dropping %s: Habitat 7 in habitat list", id_no)
             return
 
@@ -293,7 +295,7 @@ def process_row(
 def extract_data_per_species(
     classname: str,
     output_directory_path: str,
-    target_projection: Optional[str],
+    _target_projection: Optional[str],
 ) -> None:
 
     connection = psycopg2.connect(DB_CONFIG)
diff --git a/prepare-species/species_prep.py b/prepare-species/species_prep.py
deleted file mode 100644
index b4b23b8..0000000
--- a/prepare-species/species_prep.py
+++ /dev/null
@@ -1,144 +0,0 @@
-import argparse
-import os
-from enum import Enum
-from typing import List, Optional, Any, Tuple
-
-import geopandas as gpd
-import pandas as pd
-from shapely.ops import transform
-from pyproj import Transformer, CRS
-# import pyshark # pylint: disable=W0611
-
-import seasonality
-from iucn_modlib.classes.Taxon import Taxon
-from iucn_modlib.factories import TaxonFactories
-
-from cleaning import tidy_data
-
-class Seasonality(Enum):
-    RESIDENT = "resident"
-    BREEDING = "breeding"
-    NONBREEDING = "nonbreeding"
-
-    @property
-    def iucn_seasons(self) -> Tuple:
-        if self.value == 'resident':
-            return ('Resident', 'Seasonal Occurrence Unknown')
-        elif self.value == 'breeding':
-            return ('Resident', 'Breeding Season', 'Seasonal Occurrence Unknown')
-        elif self.value == 'nonbreeding':
-            return ('Resident', 'Non-Breeding Season', 'Seasonal Occurrence Unknown')
-        else:
-            raise NotImplementedError(f'Unhandled seasonlity value {self.value}')
-
-
-def seasonality_for_species(species: Taxon, range_file: str) -> Set[str]: 
-    og_seasons = set(
-        seasonality.habitatSeasonality(species) +
-        seasonality.rangeSeasonality(range_file, species.taxonid)
-    )
-    if len(og_seasons) == 0:
-        return {}
-    seasons = {'resident'}
-    if len(og_seasons.difference({'resident'})) > 0:
-        seasons = {'breeding', 'nonbreeding'}
-    return seasons
-
-
-def extract_data_per_species(
-    specieslist_path: str,
-    speciesdata_path: str,
-    iucn_data_batch: str,
-    target_projection: Optional[str],
-    output_directory_path: str,
-) -> None:
-    os.makedirs(output_directory_path, exist_ok=True)
-
-    species_list = pd.read_csv(specieslist_path, index_col=0)
-    batch = TaxonFactories.loadBatchSource(iucn_data_batch)
-    species_data = gpd.read_file(speciesdata_path)
-
-    for species_id in species_list["taxid"]:
-        try:
-            species = TaxonFactories.TaxonFactoryRedListBatch(species_id, batch)
-        except IndexError:
-            # Some of the data in the batch needs tidy...
-            print(f'{species_id} not in batch')
-            continue
-
-        seasonality_list = seasonality_for_species(species, speciesdata_path)
-        for seasonality in seasonality_list:
-            filename = f'{seasonality}-{species.taxonid}.geojson'
-
-
-
-
-    subset_of_interest = species_data[[
-        "id_no",
-        "seasonal",
-        "elevation_lower",
-        "elevation_upper",
-        "full_habitat_code",
-        "geometry"
-    ]]
-
-
-    for _, raw in subset_of_interest.iterrows():
-        row = tidy_data(raw)
-        if target_projection:
-            transformer = Transformer.from_crs(species_data.crs, CRS(target_projection))
-            new_geom = transform(transformer.transform, row.geometry)
-            row.geometry = new_geom
-        output_path = os.path.join(output_directory_path, f"{row.id_no}_{row.seasonal}.geojson")
-        res = gpd.GeoDataFrame(row.to_frame().transpose(), crs=CRS(target_projection), geometry="geometry")
-        res.to_file(output_path, driver="GeoJSON")
-
-def main() -> None:
-    parser = argparse.ArgumentParser(description="Process agregate species data to per-species-per-season for LIFE.")
-    parser.add_argument(
-        '--species',
-        type=str,
-        help='Selected list of species for evaluation',
-        required=True,
-        dest="species_list",
-    )
-    parser.add_argument(
-        '--rangedata',
-        type=str,
-        help="Processed species range data",
-        required=True,
-        dest="speciesdata_path",
-    )
-    parser.add_argument(
-        '--iucnbatch',
-        type=str,
-        help="IUCN download batch",
-        required=True,
-        dest="iucn_data_batch",
-    )
-    parser.add_argument(
-        '--projection',
-        type=str,
-        help="Target projection",
-        required=False,
-        dest="target_projection"
-    )
-    parser.add_argument(
-        '--output',
-        type=str,
-        help='Directory where per species Geojson is stored',
-        required=True,
-        dest='output_directory_path',
-    )
-    args = parser.parse_args()
-
-    extract_data_per_species(
-        args.species_list,
-        args.speciesdata_path,
-        args.iucn_data_batch,
-        args.target_projection,
-        args.output_directory_path
-    )
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/run.sh b/scripts/run.sh
index 79358ce..5bd3b17 100755
--- a/scripts/run.sh
+++ b/scripts/run.sh
@@ -94,7 +94,7 @@ python3 ./utils/persistencegenerator.py --input ${DATADIR}/species-info --datadi
 littlejohn -j 200 -o ${DATADIR}/aohbatch.log -c ${DATADIR}/aohbatch.csv ${VIRTUAL_ENV}/bin/python3 -- ./aoh-calculator/aohcalc.py --force-habitat
 
 # Calculate the per species Delta P values
-littlejohn -j 200 -o ${DATADIR}/persistencebatch.log -c ${DATADIR}/persistencebatch.csv ${VIRTUAL_ENV}/bin/python3 --  ./deltap/global_code_residents_pixel_AE_128.py
+littlejohn -j 200 -o ${DATADIR}/persistencebatch.log -c ${DATADIR}/persistencebatch.csv ${VIRTUAL_ENV}/bin/python3 --  ./deltap/global_code_residents_pixel.py
 
 for CURVE in "${CURVES[@]}"
 do
diff --git a/tests/test_calculate.py b/tests/test_calculate.py
deleted file mode 100644
index 498814f..0000000
--- a/tests/test_calculate.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from typing import Any
-
-import numpy
-import pytest
-from osgeo import gdal
-
-from yirgacheffe import WGS_84_PROJECTION
-from yirgacheffe.layers import YirgacheffeLayer, UniformAreaLayer
-from yirgacheffe.window import Area, Window
-import persistence
-
-class SingleValueLayer(YirgacheffeLayer):
-    """Mocked layer to make testing calc function easier"""
-    def __init__(self, value: Any, width: int, height: int):
-        self.value = value
-        area = Area(
-            left = -180.0,
-            top = 90.0,
-            right = 180.0,
-            bottom = -90.0
-        )
-        super().__init__(area, None, WGS_84_PROJECTION)
-        self._window = Window(0, 0, width, height)
-
-    def read_array(self, xoffset: int, yoffset: int, xsize: int, ysize: int) -> Any:
-        assert (xoffset + xsize) <= self.window.xsize
-        assert (yoffset + ysize) <= self.window.ysize
-        return numpy.ones((ysize, xsize)) * self.value
-
-@pytest.mark.parametrize(
-    "habitat,elevation,range,area,habitats,elevation_range,expected_area",
-    [
-        (100, 1234.0, True, 4.0, [100, 200, 300], (0.0, 10000.0), 4.0),
-        (100, 1234.0, False, 4.0, [100, 200, 300], (0.0, 10000.0), 0.0),
-        (100, 1234.0, True, 4.0, [200, 300], (0.0, 10000.0), 0.0),
-        (100, 1234.0, True, 4.0, [100, 200, 300], (0.0, 100.0), 0.0),
-        # (100, 1234.0, True, numpy.nan, [100, 200, 300], (0.0, 10000.0), 0.0),
-    ]
-)
-def test_calculate_simple(habitat,elevation,range,area,habitats,elevation_range,expected_area):
-    habitat_layer = SingleValueLayer(habitat, 1, 1)
-    elevation_layer = SingleValueLayer(elevation, 1, 1)
-    range_layer = SingleValueLayer(range, 1, 1)
-    area_layer = SingleValueLayer(area, 1, 1)
-
-    persistence.YSTEP = 1
-    area = persistence._calculate_cpu(
-        range_layer,
-        habitat_layer,
-        habitats,
-        elevation_layer,
-        elevation_range,
-        area_layer,
-        None
-    )
-    assert area == expected_area
-
-@pytest.mark.parametrize("step_size", [1, 2, 3, 9, 10, 11])
-def test_calculate_step_sizes(step_size):
-    habitat_layer = SingleValueLayer(100, 10, 10)
-    elevation_layer = SingleValueLayer(1234.0, 10, 10)
-    range_layer = SingleValueLayer(True, 10, 10)
-
-    # we want a non uniform area to make this interesting
-    area_dataset = gdal.GetDriverByName('mem').Create('mem', 1, 10, 1, gdal.GDT_Float32, [])
-    area_dataset.SetGeoTransform([-180.0, 180.0, 0.0, 90.0, 0.0, -18.0])
-    area_dataset.GetRasterBand(1).WriteArray(numpy.array([[float(x)] for x in range(1, 11)]), 0, 0)
-    area_layer = UniformAreaLayer(area_dataset)
-
-    persistence.YSTEP = step_size
-    area = persistence._calculate_cpu(
-        range_layer,
-        habitat_layer,
-        [100, 200, 300],
-        elevation_layer,
-        (0.0, 10000.0),
-        area_layer,
-        None
-    )
-    assert area == 550.0
diff --git a/utils/regression_plot.py b/utils/regression_plot.py
index 432e5cd..4605926 100644
--- a/utils/regression_plot.py
+++ b/utils/regression_plot.py
@@ -88,4 +88,3 @@ def main() -> None:
 
 if __name__ == "__main__":
     main()
-

From c7e6c64ffb3f07d06c554f13d020995fb32eb308 Mon Sep 17 00:00:00 2001
From: Michael Dales <mwd24@cam.ac.uk>
Date: Fri, 8 Nov 2024 14:34:17 +0000
Subject: [PATCH 36/36] Add github action

---
 .github/workflows/python-package.yml | 39 ++++++++++++++++++++++++++++
 requirements.txt                     |  4 ++-
 2 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/python-package.yml

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
new file mode 100644
index 0000000..4d3e559
--- /dev/null
+++ b/.github/workflows/python-package.yml
@@ -0,0 +1,39 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python CI
+
+on:
+  push:
+    branches: ["main"]
+  pull_request:
+    branches: ["main"]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    container: ghcr.io/osgeo/gdal:ubuntu-small-3.9.3
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10"]
+
+    steps:
+      - name: Install system
+        run: |
+          apt-get update -qqy
+          apt-get install -y git python3-pip libpq5 libpq-dev
+      - uses: actions/checkout@v4
+        with:
+          submodules: 'true'
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install -r requirements.txt
+      - name: Lint with pylint
+        run: |
+          python3 -m pylint deltap predictors prepare-layers prepare-species
diff --git a/requirements.txt b/requirements.txt
index 57a630e..caf5fc9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,12 +5,14 @@ pandas
 psutil
 scipy
 pyproj
+psycopg2
+postgis
 scikit-image
 rasterio
 requests
 alive-progress
 
-gdal[numpy]
+gdal[numpy]==3.9.3
 
 git+https://github.com/carboncredits/iucn_modlib.git
 git+https://github.com/carboncredits/yirgacheffe