Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update species filters #13

Merged
merged 4 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,7 @@ jobs:
python -m pip install -r requirements.txt
- name: Lint with pylint
run: |
python3 -m pylint deltap predictors prepare-layers prepare-species
python3 -m pylint deltap predictors prepare_layers prepare_species
- name: Tests
run: |
python3 -m pytest ./tests
2 changes: 1 addition & 1 deletion predictors/endemism.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def endemism(

species_rasters = {}
for raster_path in aohs:
speciesid = os.path.splitext(os.path.basename(raster_path))[0]
speciesid = os.path.basename(raster_path).split('_')[0]
full_path = os.path.join(aohs_dir, raster_path)
try:
species_rasters[speciesid].add(full_path)
Expand Down
2 changes: 1 addition & 1 deletion predictors/species_richness.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def species_richness(

species_rasters = {}
for raster_path in aohs:
speciesid = os.path.splitext(os.path.basename(raster_path))[0]
speciesid = os.path.basename(raster_path).split('_')[0]
full_path = os.path.join(aohs_dir, raster_path)
try:
species_rasters[speciesid].add(full_path)
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
from functools import partial
from multiprocessing import Pool
from typing import Optional, Tuple
from typing import Dict, List, Optional, Set, Tuple

# import pyshark # pylint: disable=W0611
import geopandas as gpd
Expand Down Expand Up @@ -113,26 +113,12 @@ def tidy_reproject_save(
res_projected = res.to_crs(target_crs)
res_projected.to_file(output_path, driver="GeoJSON")

def process_habitats(
habitats_data: List,
) -> Dict:

def process_row(
output_directory_path: str,
presence: Tuple[int],
row: Tuple,
) -> None:

connection = psycopg2.connect(DB_CONFIG)
register(connection)
cursor = connection.cursor()


id_no, assessment_id, elevation_lower, elevation_upper = row

cursor.execute(HABITATS_STATEMENT, (assessment_id,))
raw_habitats = cursor.fetchall()

if len(raw_habitats) == 0:
logger.debug("Dropping %s as no habitats found", id_no)
return
if len(habitats_data) == 0:
raise ValueError("No habitats found")

# Clean up habitats to ensure they're unique (the system agg in the SQL statement might duplicate them)
# In the database there are the following seasons:
Expand All @@ -148,52 +134,53 @@ def process_row(
# unknown
# null

habitats = {}
for season, major_importance, habitat_values, systems in raw_habitats:
habitats : Dict[Set[str]] = {}
major_habitats_lvl_1 : Dict[Set[int]] = {}
for season, major_importance, habitat_values, systems in habitats_data:

match season:
case 'passage', 'Passage':
case 'passage' | 'Passage':
continue
case 'resident', 'Resident', 'Seasonal Occurrence Unknown', 'unknown', None:
case 'resident' | 'Resident' | 'Seasonal Occurrence Unknown' | 'unknown' | None:
season_code = 1
case 'breeding', 'Breeding Season':
case 'breeding' | 'Breeding Season':
season_code = 2
case 'non-breeding', 'Non-Breeding Season':
case 'non-breeding' | 'Non-Breeding Season':
season_code = 3
case _:
raise ValueError(f"Unexpected season {season} for {id_no}")
raise ValueError(f"Unexpected season {season}")

if systems is None:
logger.debug("Dropping %s: no systems in DB", id_no)
continue
if "Marine" in systems:
logger.debug("Dropping %s: marine in systems", id_no)
return
raise ValueError("Marine in systems")

if habitat_values is None:
logger.debug("Dropping %s: no habitats in DB", id_no)
continue
habitat_set = set(habitat_values.split('|'))
if len(habitat_set) == 0:
continue
if any(x.startswith('7') for x in habitat_set) and major_importance == 'Yes':
logger.debug("Dropping %s: Habitat 7 in habitat list", id_no)
return

try:
habitats[season_code] |= habitat_set
except KeyError:
habitats[season_code] = habitat_set
habitats[season_code] = habitat_set | habitats.get(season_code, set())

if major_importance == 'Yes':
major_habitats_lvl_1[season_code] = \
{int(float(x)) for x in habitat_set} | major_habitats_lvl_1.get(season_code, set())

# habitat based filtering
if len(habitats) == 0:
logger.debug("Dropping %s: No habitats", id_no)
return
raise ValueError("No filtered habitats")

cursor.execute(GEOMETRY_STATEMENT, (assessment_id, presence))
geometries_data = cursor.fetchall()
for _, major_habitats in major_habitats_lvl_1.items():
if any((x == 7) for x in major_habitats):
raise ValueError("Habitat 7 in major importance habitat list")

return habitats

def process_geometries(geometries_data: List[Tuple[int,shapely.Geometry]]) -> Dict[int,shapely.Geometry]:
if len(geometries_data) == 0:
logger.info("Dropping %s: no geometries", id_no)
return
raise ValueError("No geometries")

geometries = {}
for season, geometry in geometries_data:
grange = shapely.normalize(shapely.from_wkb(geometry.to_ewkb()))
Expand All @@ -211,6 +198,36 @@ def process_row(
except KeyError:
geometries[season_code] = grange

return geometries

def process_row(
output_directory_path: str,
presence: Tuple[int],
row: Tuple,
) -> None:

connection = psycopg2.connect(DB_CONFIG)
register(connection)
cursor = connection.cursor()

id_no, assessment_id, elevation_lower, elevation_upper = row

cursor.execute(HABITATS_STATEMENT, (assessment_id,))
habitats_data = cursor.fetchall()
try:
habitats = process_habitats(habitats_data)
except ValueError as exc:
logging.info("Dropping %s: %s", id_no, str(exc))
return

cursor.execute(GEOMETRY_STATEMENT, (assessment_id, presence))
geometries_data = cursor.fetchall()
try:
geometries = process_geometries(geometries_data)
except ValueError as exc:
logging.info("Dropping %s: %s", id_no, str(exc))
return

seasons = set(geometries.keys()) | set(habitats.keys())

if seasons == {1}:
Expand Down
File renamed without changes.
21 changes: 12 additions & 9 deletions scripts/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,24 +43,24 @@ python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/pnv_raw
--output ${DATADIR}/habitat_maps/pnv/

# Generate an area scaling map
python3 ./prepare-layers/make_area_map.py --scale 0.016666666666667 --output ${DATADIR}/habitat/area-per-pixel.tif
python3 ./prepare_layers/make_area_map.py --scale 0.016666666666667 --output ${DATADIR}/habitat/area-per-pixel.tif

# Generate the arable scenario map
python3 ./prepare-layers/make_arable_map.py --current ${DATADIR}/habitat/current_raw.tif \
python3 ./prepare_layers/make_arable_map.py --current ${DATADIR}/habitat/current_raw.tif \
--output ${DATADIR}/habitat/arable.tif

python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/arable.tif \
--scale 0.016666666666667 \
--output ${DATADIR}/habitat_maps/arable/

python3 ./prepare-layers/make_diff_map.py --current ${DATADIR}/habitat/current_raw.tif \
python3 ./prepare_layers/make_diff_map.py --current ${DATADIR}/habitat/current_raw.tif \
--scenario ${DATADIR}/habitat/arable.tif \
--area ${DATADIR}/area-per-pixel.tif \
--scale 0.016666666666667 \
--output ${DATADIR}/habitat/arable_diff_area.tif

# Generate the restore map
python3 ./prepare-layers/make_restore_map.py --pnv ${DATADIR}/habitat/pnv_raw.tif \
python3 ./prepare_layers/make_restore_map.py --pnv ${DATADIR}/habitat/pnv_raw.tif \
--current ${DATADIR}/habitat/current_raw.tif \
--crosswalk ${DATADIR}/crosswalk.csv \
--output ${DATADIR}/habitat/restore.tif
Expand All @@ -69,7 +69,7 @@ python3 ./aoh-calculator/habitat_process.py --habitat ${DATADIR}/habitat/restore
--scale 0.016666666666667 \
--output ${DATADIR}/habitat_maps/restore/

python3 ./prepare-layers/make_diff_map.py --current ${DATADIR}/habitat/current_raw.tif \
python3 ./prepare_layers/make_diff_map.py --current ${DATADIR}/habitat/current_raw.tif \
--scenario ${DATADIR}/habitat/restore.tif \
--area ${DATADIR}/area-per-pixel.tif \
--scale 0.016666666666667 \
Expand All @@ -81,10 +81,10 @@ gdalwarp -t_srs EPSG:4326 -tr 0.016666666666667 -0.016666666666667 -r max -co CO
gdalwarp -t_srs EPSG:4326 -tr 0.016666666666667 -0.016666666666667 -r min -co COMPRESS=LZW -wo NUM_THREADS=40 ${DATADIR}/elevation.tif ${DATADIR}/elevation-min-1k.tif

# Get species data per taxa from IUCN data
python3 ./prepare-species/extract_species_psql.py --class AVES --output ${DATADIR}/species-info/AVES/ --projection "EPSG:4326"
python3 ./prepare-species/extract_species_psql.py --class AMPHIBIA --output ${DATADIR}/species-info/AMPHIBIA/ --projection "EPSG:4326"
python3 ./prepare-species/extract_species_psql.py --class MAMMALIA --output ${DATADIR}/species-info/MAMMALIA/ --projection "EPSG:4326"
python3 ./prepare-species/extract_species_psql.py --class REPTILIA --output ${DATADIR}/species-info/REPTILIA/ --projection "EPSG:4326"
python3 ./prepare_species/extract_species_psql.py --class AVES --output ${DATADIR}/species-info/AVES/ --projection "EPSG:4326"
python3 ./prepare_species/extract_species_psql.py --class AMPHIBIA --output ${DATADIR}/species-info/AMPHIBIA/ --projection "EPSG:4326"
python3 ./prepare_species/extract_species_psql.py --class MAMMALIA --output ${DATADIR}/species-info/MAMMALIA/ --projection "EPSG:4326"
python3 ./prepare_species/extract_species_psql.py --class REPTILIA --output ${DATADIR}/species-info/REPTILIA/ --projection "EPSG:4326"

# Generate the batch job input CSVs
python3 ./utils/speciesgenerator.py --input ${DATADIR}/species-info --datadir ${DATADIR} --output ${DATADIR}/aohbatch.csv
Expand Down Expand Up @@ -134,3 +134,6 @@ do
done

python3 ./predictors/species_richness.py --aohs_folder ${DATADIR}/aohs/current/ --output ${DATADIR}/predictors/species_richness.tif
python3 ./predictors/endemism.py --aohs_folder ${DATADIR}/aohs/current/ \
--species_richness ${DATADIR}/predictors/species_richness.tif \
--output ${DATADIR}/predictors/
127 changes: 127 additions & 0 deletions tests/test_species_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import pytest
import postgis

from prepare_species.extract_species_psql import process_habitats, process_geometries

@pytest.mark.parametrize("label", [
"resident",
"Resident",
"unknown",
"Seasonal Occurrence Unknown",
None
])
def test_simple_resident_species_habitat_filter(label):
habitat_data = [
(label, "Yes", "4.1|4.2", "Terrestrial"),
(label, "No", "4.3", "Terrestrial"),
]
res = process_habitats(habitat_data)

assert list(res.keys()) == [1]
assert res[1] == set(["4.1", "4.2", "4.3"])

@pytest.mark.parametrize("breeding_label,non_breeding_label", [
("breeding", "non-breeding"),
("Breeding Season", "non-breeding"),
("breeding", "Non-Breeding Season"),
("Breeding Season", "Non-Breeding Season"),
])
def test_simple_migratory_species_habitat_filter(breeding_label, non_breeding_label):
habitat_data = [
(breeding_label, "Yes", "4.1|4.2", "Terrestrial"),
(non_breeding_label, "No", "4.3", "Terrestrial"),
]
res = process_habitats(habitat_data)

assert list(res.keys()) == [2, 3]
assert res[2] == set(["4.1", "4.2"])
assert res[3] == set(["4.3"])

def test_reject_if_marine_in_system():
habitat_data = [
("resident", "Yes", "4.1|4.2", "Terrestrial"),
("resident", "No", "4.3", "Terrestrial|Marine"),
]
with pytest.raises(ValueError):
_ = process_habitats(habitat_data)

def test_reject_if_caves_in_major_habitat():
habitat_data = [
("resident", "Yes", "4.1|7.2", "Terrestrial"),
("resident", "No", "4.3", "Terrestrial"),
]
with pytest.raises(ValueError):
_ = process_habitats(habitat_data)

def test_do_not_reject_if_caves_in_minor_habitat():
habitat_data = [
("resident", "Yes", "4.1|4.2", "Terrestrial"),
("resident", "No", "7.3", "Terrestrial"),
]
res = process_habitats(habitat_data)

# Just resident
assert list(res.keys()) == [1]
assert res[1] == set(["4.1", "4.2", "7.3"])

@pytest.mark.parametrize("label", [
"passage",
"Passage",
])
def test_passage_habitat_ignored(label):
habitat_data = [
("resident", "Yes", "4.1|4.2", "Terrestrial"),
(label, "No", "4.3", "Terrestrial"),
]
res = process_habitats(habitat_data)

# Just resident
assert list(res.keys()) == [1]
assert res[1] == set(["4.1", "4.2"])

def test_fail_no_habitats_before_filter():
habitat_data = []
with pytest.raises(ValueError):
_ = process_habitats(habitat_data)

def test_fail_no_habitats_after_filter():
habitat_data = [
("passage", "Yes", "4.1|7.2", "Terrestrial"),
]
with pytest.raises(ValueError):
_ = process_habitats(habitat_data)

def test_fail_if_unrecognised_season_for_habitat():
habitat_data = [
("resident", "Yes", "4.1|4.2", "Terrestrial"),
("zarquon", "Yes", "4.3", "Terrestrial"),
]
with pytest.raises(ValueError):
_ = process_habitats(habitat_data)

def test_empty_geometry_list():
geoemetries_data = []
with pytest.raises(ValueError):
_ = process_geometries(geoemetries_data)


@pytest.mark.parametrize("label", [
1,
5
])
def test_simple_resident_species_geometry_filter(label):
habitat_data = [
(label, postgis.Geometry.from_ewkb("000000000140000000000000004010000000000000")),
]
res = process_geometries(habitat_data)

assert list(res.keys()) == [1]

def test_simple_migratory_species_geometry_filter():
habitat_data = [
(2, postgis.Geometry.from_ewkb("000000000140000000000000004010000000000000")),
(3, postgis.Geometry.from_ewkb("000000000140000000000000004010000000000000")),
]
res = process_geometries(habitat_data)

assert list(res.keys()) == [2, 3]
Loading