From dedc3d587d270fd66e8a43f6123196848b69380e Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Thu, 12 Sep 2024 12:51:33 +0200 Subject: [PATCH] Add `create_products_as_new_nodes` strategy --- CHANGES.md | 4 + bw2io/strategies/__init__.py | 2 + bw2io/strategies/products.py | 68 +++++++++ tests/strategies/test_products.py | 232 ++++++++++++++++++++++++++++++ 4 files changed, 306 insertions(+) create mode 100644 bw2io/strategies/products.py create mode 100644 tests/strategies/test_products.py diff --git a/CHANGES.md b/CHANGES.md index 681003b..740ad80 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,9 @@ # `bw2io` Changelog +### DEV + +* Add `create_products_as_new_nodes` strategy + ### 0.9.DEV37 (2024-09-04) * Fix out of order but with `create_randonneur_excel_template_for_unlinked` diff --git a/bw2io/strategies/__init__.py b/bw2io/strategies/__init__.py index 2e9c14f..a33ddc7 100644 --- a/bw2io/strategies/__init__.py +++ b/bw2io/strategies/__init__.py @@ -9,6 +9,7 @@ "convert_activity_parameters_to_list", "convert_uncertainty_types_to_integers", "create_composite_code", + "create_products_as_new_nodes", "csv_add_missing_exchanges_section", "csv_drop_unknown", "csv_numerize", @@ -175,6 +176,7 @@ ) from .locations import update_ecoinvent_locations from .migrations import migrate_datasets, migrate_exchanges +from .products import create_products_as_new_nodes from .sentier import match_internal_simapro_simapro_with_unit_conversion from .simapro import ( change_electricity_unit_mj_to_kwh, diff --git a/bw2io/strategies/products.py b/bw2io/strategies/products.py new file mode 100644 index 0000000..ffbbb5b --- /dev/null +++ b/bw2io/strategies/products.py @@ -0,0 +1,68 @@ +from pprint import pformat +from uuid import uuid4 +from typing import List +import bw2data as bd + + +EDGE_CORE_COLUMNS = [ + "name", + "amount", + "database", + "location", + "unit", + "functional", + "type", + "uncertainty type", + "loc", + "scale", + "shape", + "minimum", + "maximum", +] + + +def create_products_as_new_nodes(data: List[dict]) -> List[dict]: + """Create new product nodes and link to them if needed. + + We create new `product` if the following conditions are met: + + * The edge is functional (`obj.get("functional") is True`) + * The edge is unlinked (`obj.get("input")` is falsey) + * The given edge has a `name`, and that `name` is different than the dataset `name` + * The combination of `name` and `location` is not present in the other dataset nodes. If no + `location` attribute is given for the edge under consideration, we use the `location` of the + dataset. + + Create new nodes, and links the originating edges to the new product nodes. + + Modifies data in-place, and returns the modified `data`. + + """ + combos = {(ds.get("name"), ds.get("location")) for ds in data} + nodes = [] + + for ds in data: + for edge in ds.get('exchanges', []): + if edge.get('functional') and not edge.get('input') and edge.get('name') and edge['name'] != ds.get('name'): + if not ds.get("database"): + raise KeyError(""" +Can't create a new `product` node, as dataset is missing `database` attribute: +{}""".format(pformat(ds))) + key = (edge['name'], edge.get('location') or ds.get('location')) + if key not in combos: + code = uuid4().hex + nodes.append({ + 'name': edge['name'], + 'location': key[1] or bd.config.global_location, + 'unit': edge.get('unit') or ds.get('unit'), + 'exchanges': [], + 'code': code, + 'type': bd.labels.product_node_default, + 'database': ds['database'], + } | {k: v for k, v in edge.items() if k not in EDGE_CORE_COLUMNS}) + edge['input'] = (ds['database'], code) + combos.add(key) + + if nodes: + data.extend(nodes) + return data diff --git a/tests/strategies/test_products.py b/tests/strategies/test_products.py new file mode 100644 index 0000000..5c6c7d9 --- /dev/null +++ b/tests/strategies/test_products.py @@ -0,0 +1,232 @@ +import bw2data as bd +from copy import deepcopy +import pytest +from bw2io.strategies import create_products_as_new_nodes + + +def test_create_products_as_new_nodes_basic(): + data = [{ + 'name': 'epsilon', + 'location': 'there', + }, { + 'name': 'alpha', + 'database': 'foo', + 'exchanges': [{ + 'name': 'beta', + 'unit': 'kg', + 'location': 'here', + 'functional': True, + 'type': 'technosphere', + 'extra': True, + }] + }] + original = deepcopy(data) + result = create_products_as_new_nodes(data) + assert len(data) == 3 + original[1]['exchanges'][0]['input'] = (result[2]['database'], result[2]['code']) + assert result[:2] == original[:2] + product = { + 'database': 'foo', + 'code': result[2]['code'], + 'name': 'beta', + 'unit': 'kg', + 'location': 'here', + 'exchanges': [], + 'type': bd.labels.product_node_default, + 'extra': True, + } + assert result[2] == product + + +def test_create_products_as_new_nodes_skip_nonqualifying(): + data = [{ + 'name': 'epsilon', + 'location': 'there', + }, { + 'name': 'alpha', + 'database': 'foo', + 'exchanges': [{ + 'name': 'beta', + 'unit': 'kg', + 'location': 'here', + 'functional': True, + 'type': 'technosphere', + 'extra': True, + }, { + 'unit': 'kg', + 'location': 'here', + 'functional': True, + 'type': 'technosphere', + 'extra': True, + }, { + 'name': 'gamma', + 'unit': 'kg', + 'location': 'here', + 'functional': False, + 'type': 'production', + 'extra': True, + }, { + 'name': 'delta', + 'unit': 'kg', + 'location': 'here', + 'functional': True, + 'type': 'technosphere', + 'input': ("foo", "bar"), + }, { + 'name': 'epsilon', + 'unit': 'kg', + 'location': 'there', + 'functional': True, + 'type': 'technosphere', + }] + }] + original = deepcopy(data) + result = create_products_as_new_nodes(data) + assert len(data) == 3 + original[1]['exchanges'][0]['input'] = (result[2]['database'], result[2]['code']) + assert result[:2] == original[:2] + assert result[2]['name'] == 'beta' + + +def test_create_products_as_new_nodes_duplicate_exchanges(): + data = [{ + 'name': 'alpha', + 'database': 'foo', + 'exchanges': [{ + 'name': 'beta', + 'unit': 'kg', + 'location': 'here', + 'functional': True, + 'type': 'technosphere', + 'extra': True, + 'amount': 7, + }, { + 'name': 'beta', + 'unit': 'kg', + 'location': 'here', + 'functional': True, + 'type': 'technosphere', + 'extra': True, + 'amount': 17, + }] + }] + result = create_products_as_new_nodes(data) + assert len(data) == 2 + assert result[1]['name'] == 'beta' + + +def test_create_products_as_new_nodes_inherit_process_location(): + data = [{ + 'name': 'alpha', + 'database': 'foo', + 'location': 'here', + 'exchanges': [{ + 'name': 'beta', + 'unit': 'kg', + 'functional': True, + 'type': 'technosphere', + 'extra': True, + }] + }] + result = create_products_as_new_nodes(data) + assert len(data) == 2 + product = { + 'database': 'foo', + 'code': result[1]['code'], + 'name': 'beta', + 'unit': 'kg', + 'location': 'here', + 'exchanges': [], + 'type': bd.labels.product_node_default, + 'extra': True, + } + assert result[1] == product + + +def test_create_products_as_new_nodes_inherit_process_unit(): + data = [{ + 'name': 'alpha', + 'database': 'foo', + 'unit': 'kg', + 'exchanges': [{ + 'name': 'beta', + 'location': 'here', + 'functional': True, + 'type': 'technosphere', + 'extra': True, + }] + }] + result = create_products_as_new_nodes(data) + assert len(data) == 2 + product = { + 'database': 'foo', + 'code': result[1]['code'], + 'name': 'beta', + 'unit': 'kg', + 'location': 'here', + 'exchanges': [], + 'type': bd.labels.product_node_default, + 'extra': True, + } + assert result[1] == product + + +def test_create_products_as_new_nodes_inherit_process_location_when_searching(): + data = [{ + 'name': 'beta', + 'location': 'here', + }, { + 'name': 'alpha', + 'database': 'foo', + 'location': 'here', + 'exchanges': [{ + 'name': 'beta', + 'unit': 'kg', + 'functional': True, + 'type': 'technosphere', + 'extra': True, + }] + }] + create_products_as_new_nodes(data) + assert len(data) == 2 + + +def test_create_products_as_new_nodes_get_default_global_location(): + data = [{ + 'name': 'alpha', + 'database': 'foo', + 'exchanges': [{ + 'name': 'beta', + 'unit': 'kg', + 'functional': True, + 'type': 'technosphere', + 'extra': True, + }] + }] + result = create_products_as_new_nodes(data) + assert len(data) == 2 + product = { + 'database': 'foo', + 'code': result[1]['code'], + 'name': 'beta', + 'unit': 'kg', + 'location': bd.config.global_location, + 'exchanges': [], + 'type': bd.labels.product_node_default, + 'extra': True, + } + assert result[1] == product + + +def test_create_products_as_new_nodes_dataset_must_have_database_key(): + data = [{ + 'name': 'alpha', + 'exchanges': [{ + 'name': 'beta', + 'unit': 'kg', + 'functional': True, + 'type': 'technosphere', + }] + }] + with pytest.raises(KeyError): + create_products_as_new_nodes(data)