Skip to content

Commit

Permalink
Add a notebook to compute risk rankings
Browse files Browse the repository at this point in the history
  • Loading branch information
clkruse committed Dec 5, 2022
1 parent 77fb917 commit 88b58f5
Showing 1 changed file with 243 additions and 0 deletions.
243 changes: 243 additions & 0 deletions notebooks/risk-ranking.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Generate Site Risk Scores"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import geopandas as gpd\n",
"import json\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import requests\n",
"from tqdm.notebook import tqdm"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#endpoint = 'https://api.plastic.watch.earthrise.media/sites'\n",
"#response = requests.get(endpoint, params={'limit':5000})\n",
"#data = response.json()\n",
"#gdf = gpd.GeoDataFrame.from_features(data['features'])\n",
"# convert gdf columns to float if possible\n",
"gdf = gpd.read_file('/Users/ckruse/Downloads/all_vietnam_metadata.geojson')\n",
"for col in gdf.columns:\n",
" try:\n",
" gdf[col] = gdf[col].astype(float)\n",
" except:\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"variable_classes = ['Population - 1 km', 'Soil Clay Fraction', 'Distance to Waterway (m)', 'area']\n",
"stats = {}\n",
"for var in variable_classes:\n",
" stats[var] = {}\n",
" data = gdf[gdf[var] != -1][var]\n",
" print(\"min\", data.min(), \"max\", data.max())\n",
" if var == 'Soil Clay Fraction':\n",
" pass\n",
" elif var == 'Distance to Waterway (m)':\n",
" data = np.log(data + 10)\n",
" else:\n",
" data = np.log(data+0.1)\n",
" plt.hist(data, bins=40)\n",
" plt.title(var)\n",
" plt.show()\n",
" stdev = np.std(data)\n",
" mean = np.mean(data)\n",
" stats[var]['mean'] = mean\n",
" stats[var]['std'] = stdev\n",
"stats"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Frozen stats. We'd use these if we want to run risk calculations on site ingest in the API\n",
"stats = {'Population - 1 km': {'mean': 7.417273534217326, 'std': 1.7791091219232005},\n",
" 'Soil Clay Fraction': {'mean': -0.885915538147242, 'std': 0.15838979230699146},\n",
" 'Distance to Waterway (m)': {'mean': 6.390775048206517, 'std': 1.179343111730756},\n",
" 'area': {'mean': 7.154989549277127, 'std': 1.9026796861924273}}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(gdf)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"variable_classes = ['Population - 1 km', 'Soil Clay Fraction', 'Distance to Waterway (m)', 'area']\n",
"import matplotlib.pyplot as plt\n",
"risk_score = []\n",
"for site_index in range(len(gdf)):\n",
" site = gdf.iloc[site_index]\n",
" site_deviation = {}\n",
" if site['area'] != 0.0 and site['Distance to Waterway (m)'] != -1:\n",
" #print(site['area'], site['area'])\n",
" for var in variable_classes:\n",
" if var == 'Soil Clay Fraction':\n",
" data = site[var].astype('float')\n",
" print(data)\n",
" elif var == 'Distance to Waterway (m)':\n",
" data = site[var].astype('float')\n",
" data = np.log(data + 10)\n",
" else:\n",
" data = np.log(site[var].astype('float') + 0.01)\n",
" #print(var, data)\n",
" #print('average', var, stats[var]['mean'])\n",
" #print('Deviation', (data - stats[var]['mean']) / stats[var]['std'])\n",
" site_deviation[var] = (data - stats[var]['mean']) / stats[var]['std']\n",
" risk = ((61 * site_deviation['area'] + 21 * site_deviation['Population - 1 km']) - (69 * site_deviation['Distance to Waterway (m)'] + 41 * site_deviation['Soil Clay Fraction'])) / (69 + 61 + 41 + 21)\n",
" risk_score.append(risk)\n",
" else:\n",
" risk_score.append(np.nan)\n",
"risk_score = np.array(risk_score)\n",
"print(len(risk_score))\n",
"plt.hist(risk_score, bins=30)\n",
"plt.title('Raw Risk Scores')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"normed_risk = (risk_score - np.nanmin(risk_score)) / (np.nanmax(risk_score) - np.nanmin(risk_score))\n",
"plt.hist(normed_risk[normed_risk > -5], bins=35)\n",
"plt.title('Normed Risk Scores')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"new_gdf = gdf.copy()\n",
"new_gdf['risk'] = normed_risk"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# sort new_gdf by id\n",
"new_gdf = new_gdf.sort_values('id')\n",
"new_gdf['number'] = [i + 1 for i in range(len(new_gdf))]\n",
"new_gdf.to_file('/Users/ckruse/Documents/earthrise/vietnam-waste/data/vietnam-v1.0-metadata.geojson', driver='GeoJSON')\n",
"new_gdf.to_csv('/Users/ckruse/Documents/earthrise/vietnam-waste/data/vietnam-v1.0-metadata.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"study_sites = gpd.read_file('/Users/ckruse/Documents/earthrise/vietnam-waste/data/v1.0_detections_within_territories_0.25_buffer.geojson')\n",
"study_sites = study_sites.sort_values('id')\n",
"# use the id of the study site to find the risk score\n",
"study_sites['risk'] = new_gdf[new_gdf['id'].isin(study_sites['id'])]['risk'].values\n",
"# do the same for area\n",
"study_sites['area'] = new_gdf[new_gdf['id'].isin(study_sites['id'])]['area'].values\n",
"# do the same for number\n",
"study_sites['number'] = new_gdf[new_gdf['id'].isin(study_sites['id'])]['number'].values\n",
"study_sites.to_file('/Users/ckruse/Documents/earthrise/vietnam-waste/data/v1.0_detections_within_territories_0.25_buffer_risk.geojson', driver='GeoJSON')\n",
"study_sites.to_csv('/Users/ckruse/Documents/earthrise/vietnam-waste/data/v1.0_detections_within_territories_0.25_buffer_risk.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# write out new_gdf to csv\n",
"new_gdf.to_csv('/Users/ckruse/Downloads/all_vietnam_metadata_risk.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import json\n",
"endpoint = 'http://api.plastic.watch.earthrise.media/sites/'\n",
"for site in tqdm(new_gdf.iterfeatures(), total=(len(new_gdf))):\n",
" site_id = site['properties']['id']\n",
" site_endpoint = f\"{endpoint}{site_id}\"\n",
" site['id'] = site_id\n",
" r = requests.put(site_endpoint, json.dumps(site))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"interpreter": {
"hash": "d425afa4a959a86aa036beaa1a58ff3469f38e31f3ec97f5785c695b9108eced"
},
"kernelspec": {
"display_name": "Python 3.9.7 64-bit ('m1-plastics': conda)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 88b58f5

Please sign in to comment.