refactor: aggregate in python (#794)

## 🔧 Problem After computing the impacts in python, we use a js script `computeAggregated.js` and a elm file `ComputeAggregated.elm` to compute the aggregated impacts (ecoscore and pef). This leads to several problems : - `computeAggregated.js` compute aggregated impacts for all domains (textile, food, object) when we want to compute only for 1 domain leading to unwanted diff. - `computeAggregated.js` changes the order of the attributes in the json leading to unwanted diff - This split the export in different files, different programming languages, it makes no sense and adds a lot of complexity. ## 🍰 Solution For each domain the export should be in 1 python file. In this PR there is only export.py for food as it the only domain with an export.py script right now ## 🏝️ How to test run `make export_food` should make no diff --------- Co-authored-by: paulboosz <[email protected]>
MTES-MCT · Oct 15, 2024 · cf7e2c1 · cf7e2c1
1 parent 328466c
commit cf7e2c1
Show file tree

Hide file tree

Showing 4 changed files with 3,081 additions and 2,968 deletions.
diff --git a/data/Makefile b/data/Makefile
@@ -9,7 +9,7 @@ env | grep ECOBALYSE_DATA_DIR || echo "No ECOBALYSE_DATA_DIR in environment. Con
 env | grep ECOBALYSE_DATA_DIR || exit
 @if [ "$(shell docker container inspect -f '{{.State.Running}}' $(NAME) )" = "true" ]; then \
   echo "(Using the existing container)" &&\
-	docker exec -u jovyan -it -e ECOBALYSE_DATA_DIR=/home/jovyan/ecobalyse-private/ -w /home/jovyan/ecobalyse/data $(NAME) $(1);\
+	docker exec -u jovyan -it -e ECOBALYSE_DATA_DIR=/home/jovyan/ecobalyse-private/ -e PYTHONPATH=. -w /home/jovyan/ecobalyse/data $(NAME) $(1);\
 else \
 	echo "(Creating a new container)" &&\
   docker run --rm -it -v $(NAME):/home/jovyan -v $$PWD/../:/home/jovyan/ecobalyse -v $$PWD/../../dbfiles/:/home/jovyan/dbfiles -v $(ECOBALYSE_DATA_DIR):/home/jovyan/ecobalyse-private -e ECOBALYSE_DATA_DIR=/home/jovyan/ecobalyse-private/ -w /home/jovyan/ecobalyse/data $(NAME) $(1); fi
@@ -41,7 +41,7 @@ delete_method:
 	@$(call DOCKER,python3 common/delete_methods.py)
 
 export_food:
-	@$(call DOCKER,bash -c "python3 food/export.py && npm run processes:build")
+	@$(call DOCKER,bash -c "python3 food/export.py")
 
 compare_food:
 	@$(call DOCKER,python3 export.py compare)

diff --git a/data/common/export.py b/data/common/export.py
@@ -2,6 +2,7 @@
 import functools
 import json
 import logging
+from copy import deepcopy
 
 import bw2data
 from bw2io.utils import activity_hash
@@ -22,16 +23,74 @@ def spproject(activity):
             return "AGB3.1.1 2023-03-06"
 
 
-def export_json(data, filename):
+def remove_detailed_impacts(processes):
+    result = list()
+    for process in processes:
+        new_process = deepcopy(process)
+        for k in new_process["impacts"].keys():
+            if k not in ("pef", "ecs"):
+                new_process["impacts"][k] = 0
+        result.append(new_process)
+    return result
+
+
+def export_json_ordered(data, filename):
     """
     Export data to a JSON file, with added newline at the end.
+    Make sure to sort impacts in the json file
     """
+    print(f"Exporting {filename}")
+    if isinstance(data, list):
+        sorted_data = [
+            {**item, "impacts": sort_impacts(item["impacts"])}
+            if "impacts" in item
+            else item
+            for item in data
+        ]
+    elif isinstance(data, dict):
+        sorted_data = {
+            key: {**value, "impacts": sort_impacts(value["impacts"])}
+            if "impacts" in value
+            else value
+            for key, value in data.items()
+        }
+    else:
+        sorted_data = data
+
     with open(filename, "w", encoding="utf-8") as file:
-        json.dump(data, file, indent=2, ensure_ascii=False)
+        json.dump(sorted_data, file, indent=2, ensure_ascii=False)
         file.write("\n")  # Add a newline at the end of the file
     print(f"\nExported {len(data)} elements to {filename}")
 
 
+def sort_impacts(impacts):
+    # Define the desired order of impact keys
+    impact_order = [
+        "acd",
+        "cch",
+        "etf",
+        "etf-c",
+        "fru",
+        "fwe",
+        "htc",
+        "htc-c",
+        "htn",
+        "htn-c",
+        "ior",
+        "ldu",
+        "mru",
+        "ozd",
+        "pco",
+        "pma",
+        "swe",
+        "tre",
+        "wtu",
+        "pef",
+        "ecs",
+    ]
+    return {key: impacts[key] for key in impact_order if key in impacts}
+
+
 def load_json(filename):
     """
     Load JSON data from a file.
@@ -94,6 +153,49 @@ def with_corrected_impacts(impacts_ecobalyse, processes_fd, impacts_key="impacts
     return frozendict(processes_updated)
 
 
+def with_aggregated_impacts(impacts_ecobalyse, processes_fd, impacts_key="impacts"):
+    """Add aggregated impacts to the processes"""
+
+    # Pre-compute normalization factors
+    normalization_factors = {
+        "ecs": {
+            k: v["ecoscore"]["weighting"] / v["ecoscore"]["normalization"]
+            for k, v in impacts_ecobalyse.items()
+            if v["ecoscore"] is not None
+        },
+        "pef": {
+            k: v["pef"]["weighting"] / v["pef"]["normalization"]
+            for k, v in impacts_ecobalyse.items()
+            if v["pef"] is not None
+        },
+    }
+
+    processes_updated = {}
+    for key, process in processes_fd.items():
+        updated_process = dict(process)
+        updated_impacts = updated_process[impacts_key].copy()
+
+        updated_impacts["pef"] = calculate_aggregate(
+            updated_impacts, normalization_factors["pef"]
+        )
+        updated_impacts["ecs"] = calculate_aggregate(
+            updated_impacts, normalization_factors["ecs"]
+        )
+
+        updated_process[impacts_key] = updated_impacts
+        processes_updated[key] = updated_process
+
+    return frozendict(processes_updated)
+
+
+def calculate_aggregate(process_impacts, normalization_factors):
+    # We multiply by 10**6 to get the result in µPts
+    return sum(
+        10**6 * process_impacts.get(impact, 0) * normalization_factors.get(impact, 0)
+        for impact in normalization_factors
+    )
+
+
 def display_changes(key, oldprocesses, processes):
     """Display a nice sorted table of impact changes to review
     key is the field to display (id for food, uuid for textile)"""

diff --git a/data/food/export.py b/data/food/export.py
@@ -19,10 +19,12 @@
 from common.export import (
     cached_search,
     display_changes,
-    export_json,
+    export_json_ordered,
     load_json,
     progress_bar,
+    remove_detailed_impacts,
     spproject,
+    with_aggregated_impacts,
     with_corrected_impacts,
     with_subimpacts,
 )
@@ -57,7 +59,8 @@
     "FEED_FILE": f"{PROJECT_ROOT_DIR}/data/food/ecosystemic_services/feed.json",
     "UGB_FILE": f"{PROJECT_ROOT_DIR}/data/food/ecosystemic_services/ugb.csv",
     "INGREDIENTS_FILE": f"{PROJECT_ROOT_DIR}/public/data/food/ingredients.json",
-    "PROCESSES_FILE": f"{ECOBALYSE_DATA_DIR}/data/food/processes_impacts.json",
+    "PROCESSES_IMPACTS": f"{ECOBALYSE_DATA_DIR}/data/food/processes_impacts.json",
+    "PROCESSES_AGGREGATED": f"{PROJECT_ROOT_DIR}/public/data/food/processes.json",
     "LAND_OCCUPATION_METHOD": ("selected LCI results", "resource", "land occupation"),
     "GRAPH_FOLDER": f"{PROJECT_ROOT_DIR}/data/food/impact_comparison",
 }
@@ -180,8 +183,7 @@ def process_activity_for_processes(activity):
         "system_description": cached_search(
             activity.get("database", AGRIBALYSE), activity["search"]
         )["System description"],
-        "category": activity.get("category"),
-        "categories": activity.get("categories"),
+        "categories": activity.get("process_categories"),
         "comment": (
             prod[0]["comment"]
             if (
@@ -382,7 +384,7 @@ def csv_export_impact_comparison(compared_impacts):
     bw2data.config.p["biosphere_database"] = CONFIG["BIOSPHERE"]
 
     # keep the previous processes with old impacts
-    oldprocesses = load_json(CONFIG["PROCESSES_FILE"])
+    oldprocesses = load_json(CONFIG["PROCESSES_IMPACTS"])
     activities = tuple(load_json(CONFIG["ACTIVITIES_FILE"]))
 
     activities_land_occ = compute_land_occupation(activities)
@@ -422,10 +424,19 @@ def csv_export_impact_comparison(compared_impacts):
     processes_corrected_impacts = with_corrected_impacts(
         IMPACTS_DEF_ECOBALYSE, processes_impacts
     )
+    processes_aggregated_impacts = with_aggregated_impacts(
+        IMPACTS_DEF_ECOBALYSE, processes_corrected_impacts
+    )
 
     # Export
 
-    export_json(activities_land_occ, CONFIG["ACTIVITIES_FILE"])
-    export_json(ingredients_animal_es, CONFIG["INGREDIENTS_FILE"])
+    export_json_ordered(activities_land_occ, CONFIG["ACTIVITIES_FILE"])
+    export_json_ordered(ingredients_animal_es, CONFIG["INGREDIENTS_FILE"])
     display_changes("id", oldprocesses, processes_corrected_impacts)
-    export_json(list(processes_corrected_impacts.values()), CONFIG["PROCESSES_FILE"])
+    export_json_ordered(
+        list(processes_aggregated_impacts.values()), CONFIG["PROCESSES_IMPACTS"]
+    )
+    export_json_ordered(
+        remove_detailed_impacts(list(processes_aggregated_impacts.values())),
+        CONFIG["PROCESSES_AGGREGATED"],
+    )