Skip to content

Commit

Permalink
refactor: aggregate in python (#794)
Browse files Browse the repository at this point in the history
## 🔧 Problem

After computing the impacts in python, we use a js script
`computeAggregated.js` and a elm file `ComputeAggregated.elm` to compute
the aggregated impacts (ecoscore and pef).

This leads to several problems :
- `computeAggregated.js` compute aggregated impacts for all domains
(textile, food, object) when we want to compute only for 1 domain
leading to unwanted diff.
- `computeAggregated.js` changes the order of the attributes in the json
leading to unwanted diff
- This split the export in different files, different programming
languages, it makes no sense and adds a lot of complexity.

## 🍰 Solution

For each domain the export should be in 1 python file.

In this PR there is only export.py for food as it the only domain with
an export.py script right now

## 🏝️ How to test

run `make export_food` should make no diff

---------

Co-authored-by: paulboosz <[email protected]>
  • Loading branch information
ccomb and paulboosz authored Oct 15, 2024
1 parent 328466c commit cf7e2c1
Show file tree
Hide file tree
Showing 4 changed files with 3,081 additions and 2,968 deletions.
4 changes: 2 additions & 2 deletions data/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ env | grep ECOBALYSE_DATA_DIR || echo "No ECOBALYSE_DATA_DIR in environment. Con
env | grep ECOBALYSE_DATA_DIR || exit
@if [ "$(shell docker container inspect -f '{{.State.Running}}' $(NAME) )" = "true" ]; then \
echo "(Using the existing container)" &&\
docker exec -u jovyan -it -e ECOBALYSE_DATA_DIR=/home/jovyan/ecobalyse-private/ -w /home/jovyan/ecobalyse/data $(NAME) $(1);\
docker exec -u jovyan -it -e ECOBALYSE_DATA_DIR=/home/jovyan/ecobalyse-private/ -e PYTHONPATH=. -w /home/jovyan/ecobalyse/data $(NAME) $(1);\
else \
echo "(Creating a new container)" &&\
docker run --rm -it -v $(NAME):/home/jovyan -v $$PWD/../:/home/jovyan/ecobalyse -v $$PWD/../../dbfiles/:/home/jovyan/dbfiles -v $(ECOBALYSE_DATA_DIR):/home/jovyan/ecobalyse-private -e ECOBALYSE_DATA_DIR=/home/jovyan/ecobalyse-private/ -w /home/jovyan/ecobalyse/data $(NAME) $(1); fi
Expand Down Expand Up @@ -41,7 +41,7 @@ delete_method:
@$(call DOCKER,python3 common/delete_methods.py)

export_food:
@$(call DOCKER,bash -c "python3 food/export.py && npm run processes:build")
@$(call DOCKER,bash -c "python3 food/export.py")

compare_food:
@$(call DOCKER,python3 export.py compare)
Expand Down
106 changes: 104 additions & 2 deletions data/common/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import functools
import json
import logging
from copy import deepcopy

import bw2data
from bw2io.utils import activity_hash
Expand All @@ -22,16 +23,74 @@ def spproject(activity):
return "AGB3.1.1 2023-03-06"


def export_json(data, filename):
def remove_detailed_impacts(processes):
result = list()
for process in processes:
new_process = deepcopy(process)
for k in new_process["impacts"].keys():
if k not in ("pef", "ecs"):
new_process["impacts"][k] = 0
result.append(new_process)
return result


def export_json_ordered(data, filename):
"""
Export data to a JSON file, with added newline at the end.
Make sure to sort impacts in the json file
"""
print(f"Exporting {filename}")
if isinstance(data, list):
sorted_data = [
{**item, "impacts": sort_impacts(item["impacts"])}
if "impacts" in item
else item
for item in data
]
elif isinstance(data, dict):
sorted_data = {
key: {**value, "impacts": sort_impacts(value["impacts"])}
if "impacts" in value
else value
for key, value in data.items()
}
else:
sorted_data = data

with open(filename, "w", encoding="utf-8") as file:
json.dump(data, file, indent=2, ensure_ascii=False)
json.dump(sorted_data, file, indent=2, ensure_ascii=False)
file.write("\n") # Add a newline at the end of the file
print(f"\nExported {len(data)} elements to {filename}")


def sort_impacts(impacts):
# Define the desired order of impact keys
impact_order = [
"acd",
"cch",
"etf",
"etf-c",
"fru",
"fwe",
"htc",
"htc-c",
"htn",
"htn-c",
"ior",
"ldu",
"mru",
"ozd",
"pco",
"pma",
"swe",
"tre",
"wtu",
"pef",
"ecs",
]
return {key: impacts[key] for key in impact_order if key in impacts}


def load_json(filename):
"""
Load JSON data from a file.
Expand Down Expand Up @@ -94,6 +153,49 @@ def with_corrected_impacts(impacts_ecobalyse, processes_fd, impacts_key="impacts
return frozendict(processes_updated)


def with_aggregated_impacts(impacts_ecobalyse, processes_fd, impacts_key="impacts"):
"""Add aggregated impacts to the processes"""

# Pre-compute normalization factors
normalization_factors = {
"ecs": {
k: v["ecoscore"]["weighting"] / v["ecoscore"]["normalization"]
for k, v in impacts_ecobalyse.items()
if v["ecoscore"] is not None
},
"pef": {
k: v["pef"]["weighting"] / v["pef"]["normalization"]
for k, v in impacts_ecobalyse.items()
if v["pef"] is not None
},
}

processes_updated = {}
for key, process in processes_fd.items():
updated_process = dict(process)
updated_impacts = updated_process[impacts_key].copy()

updated_impacts["pef"] = calculate_aggregate(
updated_impacts, normalization_factors["pef"]
)
updated_impacts["ecs"] = calculate_aggregate(
updated_impacts, normalization_factors["ecs"]
)

updated_process[impacts_key] = updated_impacts
processes_updated[key] = updated_process

return frozendict(processes_updated)


def calculate_aggregate(process_impacts, normalization_factors):
# We multiply by 10**6 to get the result in µPts
return sum(
10**6 * process_impacts.get(impact, 0) * normalization_factors.get(impact, 0)
for impact in normalization_factors
)


def display_changes(key, oldprocesses, processes):
"""Display a nice sorted table of impact changes to review
key is the field to display (id for food, uuid for textile)"""
Expand Down
27 changes: 19 additions & 8 deletions data/food/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@
from common.export import (
cached_search,
display_changes,
export_json,
export_json_ordered,
load_json,
progress_bar,
remove_detailed_impacts,
spproject,
with_aggregated_impacts,
with_corrected_impacts,
with_subimpacts,
)
Expand Down Expand Up @@ -57,7 +59,8 @@
"FEED_FILE": f"{PROJECT_ROOT_DIR}/data/food/ecosystemic_services/feed.json",
"UGB_FILE": f"{PROJECT_ROOT_DIR}/data/food/ecosystemic_services/ugb.csv",
"INGREDIENTS_FILE": f"{PROJECT_ROOT_DIR}/public/data/food/ingredients.json",
"PROCESSES_FILE": f"{ECOBALYSE_DATA_DIR}/data/food/processes_impacts.json",
"PROCESSES_IMPACTS": f"{ECOBALYSE_DATA_DIR}/data/food/processes_impacts.json",
"PROCESSES_AGGREGATED": f"{PROJECT_ROOT_DIR}/public/data/food/processes.json",
"LAND_OCCUPATION_METHOD": ("selected LCI results", "resource", "land occupation"),
"GRAPH_FOLDER": f"{PROJECT_ROOT_DIR}/data/food/impact_comparison",
}
Expand Down Expand Up @@ -180,8 +183,7 @@ def process_activity_for_processes(activity):
"system_description": cached_search(
activity.get("database", AGRIBALYSE), activity["search"]
)["System description"],
"category": activity.get("category"),
"categories": activity.get("categories"),
"categories": activity.get("process_categories"),
"comment": (
prod[0]["comment"]
if (
Expand Down Expand Up @@ -382,7 +384,7 @@ def csv_export_impact_comparison(compared_impacts):
bw2data.config.p["biosphere_database"] = CONFIG["BIOSPHERE"]

# keep the previous processes with old impacts
oldprocesses = load_json(CONFIG["PROCESSES_FILE"])
oldprocesses = load_json(CONFIG["PROCESSES_IMPACTS"])
activities = tuple(load_json(CONFIG["ACTIVITIES_FILE"]))

activities_land_occ = compute_land_occupation(activities)
Expand Down Expand Up @@ -422,10 +424,19 @@ def csv_export_impact_comparison(compared_impacts):
processes_corrected_impacts = with_corrected_impacts(
IMPACTS_DEF_ECOBALYSE, processes_impacts
)
processes_aggregated_impacts = with_aggregated_impacts(
IMPACTS_DEF_ECOBALYSE, processes_corrected_impacts
)

# Export

export_json(activities_land_occ, CONFIG["ACTIVITIES_FILE"])
export_json(ingredients_animal_es, CONFIG["INGREDIENTS_FILE"])
export_json_ordered(activities_land_occ, CONFIG["ACTIVITIES_FILE"])
export_json_ordered(ingredients_animal_es, CONFIG["INGREDIENTS_FILE"])
display_changes("id", oldprocesses, processes_corrected_impacts)
export_json(list(processes_corrected_impacts.values()), CONFIG["PROCESSES_FILE"])
export_json_ordered(
list(processes_aggregated_impacts.values()), CONFIG["PROCESSES_IMPACTS"]
)
export_json_ordered(
remove_detailed_impacts(list(processes_aggregated_impacts.values())),
CONFIG["PROCESSES_AGGREGATED"],
)
Loading

0 comments on commit cf7e2c1

Please sign in to comment.