diff --git a/activity_browser/bwutils/multilca.py b/activity_browser/bwutils/multilca.py index 759c3da9f..e2ddafa85 100644 --- a/activity_browser/bwutils/multilca.py +++ b/activity_browser/bwutils/multilca.py @@ -1,4 +1,5 @@ from collections import OrderedDict +from copy import deepcopy from typing import Iterable, Optional, Union from logging import getLogger @@ -415,6 +416,7 @@ def _build_dict( rev_dict: dict, limit: int, limit_type: str, + total_range: bool, ) -> dict: """Sort the given contribution array on method or reference flow column. @@ -433,15 +435,32 @@ def _build_dict( """ topcontribution_dict = dict() for fu_or_method, col in FU_M_index.items(): + contribution_col = contributions[col, :] + if total_range: # total is based on the range + total = np.abs(contribution_col).sum() + else: # total is based on the score + total = contribution_col.sum() + top_contribution = ca.sort_array( - contributions[col, :], limit=limit, limit_type=limit_type + contribution_col, limit=limit, limit_type=limit_type, total=total + ) + + # split and calculate remaining rest sections for positive and negative part + pos_rest = ( + np.sum(contribution_col[contribution_col > 0]) + - np.sum(top_contribution[top_contribution[:, 0] > 0][:, 0]) ) + neg_rest = ( + np.sum(contribution_col[contribution_col < 0]) + - np.sum(top_contribution[top_contribution[:, 0] < 0][:, 0]) + ) + cont_per = OrderedDict() cont_per.update( { - ("Total", ""): contributions[col, :].sum(), - ("Rest", ""): contributions[col, :].sum() - - top_contribution[:, 0].sum(), + ("Total", ""): total, + ("Rest (+)", ""): pos_rest, + ("Rest (-)", ""): neg_rest, } ) for value, index in top_contribution: @@ -544,12 +563,12 @@ def join_df_with_metadata( if special_keys: # replace index keys with labels - try: # first put Total and Rest to the first two positions in the dataframe + try: # first put Total, Rest (+) and Rest (-) to the first three positions in the dataframe complete_index = special_keys + keys joined = joined.reindex(complete_index, axis="index", fill_value=0.0) except: log.error( - "Could not put Total and Rest on positions 0 and 1 in the dataframe." + "Could not put 'Total', 'Rest (+)' and 'Rest (-)' on positions 0, 1 and 2 in the dataframe." ) joined.index = cls.get_labels(joined.index, fields=x_fields) return joined @@ -583,18 +602,20 @@ def get_labelled_contribution_dict( # If the cont_dict has tuples for keys, coerce df.columns into MultiIndex if all(isinstance(k, tuple) for k in cont_dict.keys()): df.columns = pd.MultiIndex.from_tuples(df.columns) - special_keys = [("Total", ""), ("Rest", "")] - + special_keys = [("Total", ""), ("Rest (+)", ""), ("Rest (-)", "")] # replace all 0 values with NaN and drop all rows with only NaNs - # EXCEPT for the special keys - df.index = ids_to_keys(df.index) - index = ( - df.loc[df.index.difference(special_keys)] - .replace(0, np.nan) - .dropna(how="all") - .index.union(special_keys) - ) - df = df.loc[index] + df = df.replace(0, np.nan) + + # sort on absolute mean of a row + df_bot = deepcopy(df.loc[df.index.difference(special_keys)].dropna(how="all")) + + func = lambda row: np.nanmean(np.abs(row)) + if len(df_bot) > 1: # but only sort if there is something to sort + df_bot["_sort_me_"] = (df_bot.select_dtypes(include=np.number)).apply(func, axis=1) + df_bot.sort_values(by="_sort_me_", ascending=False, inplace=True) + del df_bot["_sort_me_"] + + df = pd.concat([df.iloc[:3, :], df_bot], axis=0) if not mask: joined = self.join_df_with_metadata( @@ -617,7 +638,7 @@ def adjust_table_unit(df: pd.DataFrame, method: Optional[tuple]) -> pd.DataFrame """Given a dataframe, adjust the unit of the table to either match the given method, or not exist.""" if "unit" not in df.columns: return df - keys = df.index[~df["index"].isin({"Total", "Rest"})] + keys = df.index[~df["index"].isin({"Total", "Rest (+)", "Rest (-)"})] unit = bd.Method(method).metadata.get("unit") if method else "unit" df.loc[keys, "unit"] = unit return df @@ -791,6 +812,7 @@ def top_elementary_flow_contributions( limit: int = 5, normalize: bool = False, limit_type: str = "number", + total_range: bool = True, **kwargs, ) -> pd.DataFrame: """Return top EF contributions for either functional_unit or method. @@ -807,6 +829,7 @@ def top_elementary_flow_contributions( limit : The number of top contributions to consider normalize : Determines whether or not to normalize the contribution values limit_type : The type of limit, either 'number' or 'percent' + total_range : Whether to consider the total for contributions the range (True) or the score (False) Returns ------- @@ -830,7 +853,7 @@ def top_elementary_flow_contributions( contributions = self.normalize(contributions) top_cont_dict = self._build_dict( - contributions, index, rev_index, limit, limit_type + contributions, index, rev_index, limit, limit_type, total_range ) labelled_df = self.get_labelled_contribution_dict( top_cont_dict, x_fields=x_fields, y_fields=y_fields, mask=mask @@ -846,6 +869,7 @@ def top_process_contributions( limit: int = 5, normalize: bool = False, limit_type: str = "number", + total_range: bool = True, **kwargs, ) -> pd.DataFrame: """Return top process contributions for functional_unit or method. @@ -885,7 +909,7 @@ def top_process_contributions( contributions = self.normalize(contributions) top_cont_dict = self._build_dict( - contributions, index, rev_index, limit, limit_type + contributions, index, rev_index, limit, limit_type, total_range ) labelled_df = self.get_labelled_contribution_dict( top_cont_dict, x_fields=x_fields, y_fields=y_fields, mask=mask diff --git a/activity_browser/docs/wiki/LCA-Results.md b/activity_browser/docs/wiki/LCA-Results.md index ebbfce3d4..5acf69858 100644 --- a/activity_browser/docs/wiki/LCA-Results.md +++ b/activity_browser/docs/wiki/LCA-Results.md @@ -14,8 +14,8 @@ ## Contribution Analysis ### Differences between approaches -Activity Browser has two contribution analysis approaches available to assess results, -`Elementary Flow (EF) Contributions` and `Process contributions`. +Activity Browser has three contribution analysis approaches available to assess results, +`Elementary Flow (EF) Contributions`, `Process contributions` and `First Tier (FT) Contributions`. Before we discuss the different approaches, we introduce a small example for the production of _'steel'_: @@ -45,9 +45,10 @@ For the system and functional unit above, this would be: The _contribution matrix_ show the dis-aggregated results for each individual biosphere flow for each activity. -#### EF contributions +#### Elementary Flow (EF) contributions If we take sum the _rows_ to one row, we get the EF contributions (the contribution of all CO2 and CH4 impacts together). + In the case above, the EF contributions are: - CO2: 1.5404... (96.3%) - CH4: 0.0596... (3.7%) @@ -55,27 +56,59 @@ In the case above, the EF contributions are: #### Process contributions If we take the sum of the _columns_ to one column, we get the process contributions (the contribution of all coal, electricity and steel production impacts together). + In the case above, the process contributions are: - coal production: 0.0596... (3.7%) -- electricity production: 0.5404... (62.5%) -- steel production: 1 (33.8%) +- electricity production: 0.5404... (33.8%) +- steel production: 1 (62.5%) To summarize, the difference between EF and process contributions is the direction the contribution matrix is summed. +#### First Tier (FT) contributions +The FT contributions take a very different approach, instead of calculating the impact of processes anywhere in the +system, FT contributions are the process of the functional unit and all its inputs. +By calculating the impact of the inputs to the functional unit, the impacts are accumulated. +In the example above this would mean that the impact of _'coal'_ is calculated from only the coal needed directly by +_'steel production'_, the impact from coal produced for _'electricity production'_ would be included in the +_'electricty'_. +Together with the _direct_ impact from _'steel production'_, this is the _first tier_. + +This approach becomes more useful when using large systems to accumulate impacts into relevant parts of your foreground +system. + +Activity Browser calculates these impacts by applying _partial LCAs_ (LCA on part of the functional unit) on the inputs, +scaled to the functional unit. + +In the case above, the FT contributions are: +- coal: 0.0298... (1.9%) +- electricity: 0.5702... (35.6%) +- steel production: 1 (62.5%) + +Note that we now use the names of the products _'coal'_ and _'electricity'_ as we now assess the impacts of these inputs, +not the processes. + +Note also how the impact of _'steel production'_ is unchanged, as this still shows the _direct_ impact, but that the +impact of _'electricity'_ is higher than _'electricity production'_ in the process contributions. +This is due to the fact that we include all impacts in the production of electricity, not just the _direct_ impacts. +However, these results are compensated by a lower impact of _'coal'_ (compared to process contributions of +_'coal production'_). +The total impact is still 1.6. + ### Manipulating results In this section we generalize a little bit for the different contribution approaches, -we call the _from_ part of the contributions (the EFs or activities above) _entities_. +we call the _from_ part of the contributions (the EFs or activities or FT above) _entities_. There are several ways Activity Browser manipulates your results by default. - The results are **sorted** so that the row with the largest (absolute) average values are shown first. -- A `cut-off` of 5% is applied, this only shows results that contribute at least 5% to the total result, - all other entities are grouped into a `Rest` group. +- A `cut-off` of 5% is applied, this only shows results that contribute at least 5% to the total range of results, + all other entities are grouped into a `Rest (+)` or `Rest (-)` groups. - The contributions are _normalized_ to the impact of that reference flow, meaning they are show as a percentage, counting up to 100% for every item you compare. These actions are taken to show you the most relevant results. -You can manually manipulate the contribution results in the next menu, which we explain bit by bit below. +You can manually manipulate the contribution results in the menu shown below, which we will explain bit by bit +in the next sections. ![contributions cutoff](./assets/contribution_manipulation.png) #### Cut-off @@ -84,7 +117,8 @@ The `Relative` mode shows contributions _from_ entities of _x_% or higher. The `Top #` mode shows contributions from the _x_ entities that contribute the most (as absolute). You can adjust the `Cut-off level` to change how many results you see. -All results that don't make the cut-off will be grouped into the `Rest` group. +All results that don't make the cut-off will be grouped into the `Rest (+)` and `Rest (-)` groups. +The Rest groups are only present when there are positive or negative numbers remaining for the respective rest groups. #### Compare The `Compare` menu allows you to compare different dimensions of results. @@ -106,8 +140,29 @@ By default, Activity Browser shows a plot and a table. You can disable one of them if you want to focus on one of them. #### Relative and Absolute -Finally, you can choose between `Relative` and `Absolute` results. -The `Relative` results will sum to 100%, the `Absolute` results will sum to the impact score. +You can choose between `Relative` and `Absolute` results. +The `Relative` results will sum to 100% (the total score), the `Absolute` results will sum to the impact score. + +#### Range and Score +If the Cut-off type is `Relative`, you can choose between `Range` and `Score`. +This determines what you use as the _total_ to which the relative contributions are counted. +For `Range`, this is the full _range_ of results, for example, if all your negative results together have a score of -2 +and all your positive results together have a score of 10, the _range_ is 12 (-2 * -1 + 10). +For `Score`, this is the total score (sum) of the results, for example, if all your negative results together have a +score of -2 and all your positive results together have a score of 10, the _score_ is 8 (-2 + 10). +The `Range` or `Score` setting are only used when 1) your Cut-off type is `Relative` +and 2) your results contain both positive and negative results. + +### Positive and negative numbers in contribution results +It can happen in LCA that you get both positive and negative numbers in your contribution results. +Some of these reasons could be negative characterization factors, flows with negative numbers or using substitution flows. + +When there are both positive and negative numbers in the result, Activity Browser will show a marker to indicate +where the total score is, and show positive and negative contributions to the impact separately. + +Below is a simple example (with unrealistic values) to demonstrate this: + +![CA example with positive and negative results](./assets/ca_positive_negative_example.png) ## Sankey The `Sankey` tab shows results from [graph traversal](https://docs.brightway.dev/projects/graphtools/en/latest/index.html). @@ -125,6 +180,8 @@ The `calculation depth` will stop traversing the supply chain once that number o In the Sankey, the red arrows show the _cumulative_ impact of the _product_ flow (_direct_ from that process and _indirect_ from all upstream processes involved in producing that product), the boxes show the _direct_ (process contribution) impact of that process. +Effectively, the sankey graph is the First Tier contribution analysis, repeated for every activity you see in the graph, +making it _n-tier_ contributions. Using the example above in the [contribution analysis](#contribution-analysis) section, we show the sankey below. The [process contribution](#process-contributions) results are also shown in the boxes below. diff --git a/activity_browser/docs/wiki/assets/ca_positive_negative_example.png b/activity_browser/docs/wiki/assets/ca_positive_negative_example.png new file mode 100644 index 000000000..9818c1aa3 Binary files /dev/null and b/activity_browser/docs/wiki/assets/ca_positive_negative_example.png differ diff --git a/activity_browser/docs/wiki/assets/contribution_manipulation.png b/activity_browser/docs/wiki/assets/contribution_manipulation.png index eae46912a..d10a004bb 100644 Binary files a/activity_browser/docs/wiki/assets/contribution_manipulation.png and b/activity_browser/docs/wiki/assets/contribution_manipulation.png differ diff --git a/activity_browser/layouts/tabs/LCA_results_tabs.py b/activity_browser/layouts/tabs/LCA_results_tabs.py index 3df4cae44..811c22d62 100644 --- a/activity_browser/layouts/tabs/LCA_results_tabs.py +++ b/activity_browser/layouts/tabs/LCA_results_tabs.py @@ -5,10 +5,13 @@ """ from collections import namedtuple +from copy import deepcopy from typing import List, Optional, Union from logging import getLogger +import numpy as np import pandas as pd + from PySide2 import QtCore, QtGui from PySide2.QtWidgets import (QApplication, QButtonGroup, QCheckBox, QComboBox, QFileDialog, QGridLayout, QGroupBox, @@ -16,7 +19,12 @@ QPushButton, QRadioButton, QScrollArea, QTableView, QTabWidget, QToolBar, QVBoxLayout, QWidget) + +from activity_browser.bwutils import AB_metadata + from stats_arrays.errors import InvalidParamsError +import bw2data as bd +import bw2analyzer as ba from activity_browser import signals from activity_browser.mod.bw2data import calculation_setups @@ -33,6 +41,8 @@ from ...ui.widgets import CutoffMenu, SwitchComboBox from .base import BaseRightTab +ca = ba.ContributionAnalysis() + log = getLogger(__name__) @@ -42,6 +52,16 @@ def get_header_layout(header_text: str) -> QVBoxLayout: vlayout.addWidget(horizontal_line()) return vlayout +def get_header_layout_w_help(header_text: str, help_widget) -> QVBoxLayout: + hlayout = QHBoxLayout() + hlayout.addWidget(header(header_text)) + hlayout.addWidget(help_widget) + hlayout.setStretch(0, 1) + + vlayout = QVBoxLayout() + vlayout.addLayout(hlayout) + vlayout.addWidget(horizontal_line()) + return vlayout def get_unit(method: tuple, relative: bool = False) -> str: """Get the unit for plot axis naming. @@ -62,9 +82,10 @@ def get_unit(method: tuple, relative: bool = False) -> str: # Special namedtuple for the LCAResults TabWidget. Tabs = namedtuple( - "tabs", ("inventory", "results", "ef", "process", "sankey", "mc", "gsa") + "tabs", ("inventory", "results", "ef", "process", "ft", "sankey", "mc", "gsa") ) Relativity = namedtuple("relativity", ("relative", "absolute")) +TotalMenu = namedtuple("total_menu", ("range", "score")) ExportTable = namedtuple("export_table", ("label", "copy", "csv", "excel")) ExportPlot = namedtuple("export_plot", ("label", "png", "svg")) PlotTableCheck = namedtuple("plot_table_space", ("plot", "table", "invert")) @@ -121,6 +142,7 @@ def __init__(self, data: dict, parent=None): results=LCAResultsTab(self), ef=ElementaryFlowContributionTab(self), process=ProcessContributionsTab(self), + ft=FirstTierContributionsTab(self.cs_name, parent=self), sankey=SankeyNavigatorWidget(self.cs_name, parent=self), mc=MonteCarloTab( self @@ -132,6 +154,7 @@ def __init__(self, data: dict, parent=None): results="LCA Results", ef="EF Contributions", process="Process Contributions", + ft="FT Contributions", sankey="Sankey", mc="Monte Carlo", gsa="Sensitivity Analysis", @@ -174,6 +197,11 @@ def generate_content_on_click(self, index): if not self.tabs.sankey.has_sankey: log.info("Generating Sankey Tab") self.tabs.sankey.new_sankey() + elif index == self.indexOf(self.tabs.ft): + if not self.tabs.ft.has_been_opened: + log.info("Generating First Tier results") + self.tabs.ft.has_been_opened = True + self.tabs.ft.update_tab() @QtCore.Slot(name="lciaScenarioExport") def generate_lcia_scenario_csv(self): @@ -217,6 +245,9 @@ class NewAnalysisTab(BaseRightTab): def __init__(self, parent=None): super().__init__(parent) + + self.help_button: Optional[QToolBar] = None + self.parent = parent self.has_scenarios = self.parent.has_scenarios @@ -226,6 +257,8 @@ def __init__(self, parent=None): self.plot_table: Optional[PlotTableCheck] = None self.relativity: Optional[Relativity] = None self.relative: Optional[bool] = None + self.total_menu: Optional[TotalMenu] = None + self.total_range: Optional[bool] = None self.export_plot: Optional[ExportPlot] = None self.export_table: Optional[ExportTable] = None @@ -267,6 +300,11 @@ def build_main_space(self, invertable: bool = False) -> QScrollArea: row.addWidget(self.relativity.relative) row.addWidget(self.relativity.absolute) self.relativity.relative.toggled.connect(self.relativity_check) + if self.total_menu: + row.addWidget(vertical_line()) + row.addWidget(self.total_menu.range) + row.addWidget(self.total_menu.score) + self.total_menu.range.toggled.connect(self.total_check) row.addStretch() # Assemble Table and Plot area @@ -300,6 +338,12 @@ def relativity_check(self, checked: bool): self.relative = checked self.update_tab() + @QtCore.Slot(bool, name="isTotalToggled") + def total_check(self, checked: bool): + """Check if the relative or absolute option is selected.""" + self.total_range = checked + self.update_tab() + def get_scenario_labels(self) -> List[str]: """Get scenario labels if scenarios are used.""" return self.parent.mlca.scenario_names if self.has_scenarios else [] @@ -330,11 +374,11 @@ def update_combobox(box: QComboBox, labels: List[str]) -> None: def update_tab(self): """Update the plot and table if they are present.""" - if self.plot: + if self.plot and self.plot.isVisible: self.update_plot() - if self.table: + if self.table and self.table.isVisible: self.update_table() - if self.plot and self.table: + if self.plot and self.plot.isVisible and self.table and self.table.isVisible: self.space_check() def update_table(self, *args, **kwargs): @@ -891,6 +935,27 @@ def __init__(self, parent, **kwargs): self.relativity.absolute.setToolTip( "Show absolute values (compare magnitudes of each contribution)" ) + self.relativity_group = QButtonGroup(self) + self.relativity_group.addButton(self.relativity.relative) + self.relativity_group.addButton(self.relativity.absolute) + + self.total_menu = TotalMenu( + QRadioButton("Range"), + QRadioButton("Score"), + ) + self.total_menu.range.setChecked(True) + self.total_range = True + self.total_menu.range.setToolTip( + "Show the contribution relative to the total range of results.\n" + "e.g. total negative results is -2 and total positive results is 10, then range is 12 (-2 * -1 + 10)" + ) + self.total_menu.score.setToolTip( + "Show the contributions relative to the total impact score.\n" + "e.g. total negative results is -2 and total positive results is 10, then score is 8 (-2 + 10)" + ) + self.total_group = QButtonGroup(self) + self.total_group.addButton(self.total_menu.range) + self.total_group.addButton(self.total_menu.score) self.df = None self.plot = ContributionPlot() @@ -899,6 +964,26 @@ def __init__(self, parent, **kwargs): self.has_method, self.has_func = False, False self.unit = None + self.has_been_opened = False + + # set-up the help button + self.explain_text = """ +
There are three ways of doing Contribtion Analysis in Activity Browser: +
- Elementary Flow (EF) Contributions
+- Process Contributions
+- First Tier (FT) Contributions
+ + Detailed information on the different approaches provided in this wiki page about the different approaches. + +You can manipulate the results in many ways with Activity Browser, read more on this wiki page + about manipulating results. + """ + + self.help_button = QToolBar(self) + self.help_button.addAction( + qicons.question, "Left click for help on Contribution Analysis Functions", self.explanation + ) + def set_filename(self, optional_fields: dict = None): """Given a dictionary of fields, put together a usable filename for the plot and table.""" optional = optional_fields or {} @@ -909,6 +994,7 @@ def set_filename(self, optional_fields: dict = None): optional.get("functional_unit"), self.unit, ) + filename = "_".join((str(x) for x in fields if x is not None)) self.plot.plot_name, self.table.table_name = filename, filename @@ -981,12 +1067,9 @@ def set_combobox_changes(self): # gather the combobox values method = self.parent.method_dict[self.combobox_menu.method.currentText()] functional_unit = self.combobox_menu.func.currentText() - scenario = self.combobox_menu.scenario.currentIndex() + scenario = max(self.combobox_menu.scenario.currentIndex(), 0) # set scenario 0 if not initiated yet aggregator = self.combobox_menu.agg.currentText() - # catch uninitiated scenario combobox - if scenario < 0: - scenario = 0 # set aggregator to None if unwanted if aggregator == "none": aggregator = None @@ -1026,8 +1109,17 @@ def connect_signals(self): def update_tab(self): """Update the tab.""" + QApplication.setOverrideCursor(QtCore.Qt.WaitCursor) self.set_combobox_changes() + + if self.cutoff_menu.limit_type == "percent": + self.total_menu.range.setEnabled(True) + self.total_menu.score.setEnabled(True) + else: + self.total_menu.range.setEnabled(False) + self.total_menu.score.setEnabled(False) super().update_tab() + QApplication.restoreOverrideCursor() def update_dataframe(self, *args, **kwargs): """Update the underlying dataframe. @@ -1036,7 +1128,7 @@ def update_dataframe(self, *args, **kwargs): raise NotImplementedError def update_table(self): - super().update_table(self.df) + super().update_table(self.df, unit=self.unit) def update_plot(self): """Update the plot.""" @@ -1075,7 +1167,8 @@ class ElementaryFlowContributionTab(ContributionTab): def __init__(self, parent=None): super().__init__(parent) - self.layout.addLayout(get_header_layout("Elementary Flow Contributions")) + header = get_header_layout_w_help("Elementary Flow Contributions", self.help_button) + self.layout.addLayout(header) self.layout.addWidget(self.cutoff_menu) self.layout.addWidget(horizontal_line()) combobox = self.build_combobox(has_method=True, has_func=True) @@ -1101,7 +1194,8 @@ def update_dataframe(self, *args, **kwargs): **kwargs, limit=self.cutoff_menu.cutoff_value, limit_type=self.cutoff_menu.limit_type, - normalize=self.relative + normalize=self.relative, + total_range=self.total_range, ) @@ -1127,7 +1221,8 @@ class ProcessContributionsTab(ContributionTab): def __init__(self, parent=None): super().__init__(parent) - self.layout.addLayout(get_header_layout("Process Contributions")) + header = get_header_layout_w_help("Process Contributions", self.help_button) + self.layout.addLayout(header) self.layout.addWidget(self.cutoff_menu) self.layout.addWidget(horizontal_line()) combobox = self.build_combobox(has_method=True, has_func=True) @@ -1155,8 +1250,392 @@ def update_dataframe(self, *args, **kwargs): **kwargs, limit=self.cutoff_menu.cutoff_value, limit_type=self.cutoff_menu.limit_type, - normalize=self.relative + normalize=self.relative, + total_range=self.total_range, + ) + + +class FirstTierContributionsTab(ContributionTab): + """Class for the 'First Tier Contributions' sub-tab. + + This tab allows for analysis of first-tier (product) contributions. + The direct impact (from biosphere exchanges from the FU) + and cumulative impacts from all exchange inputs to the FU (first level) are calculated. + + e.g. the direct emissions from steel production and the cumulative impact for all electricity input + into that activity. This works on the basis of input products and their total (cumulative) impact, scaled to + how much of that product is needed in the FU. + + Example questions that can be answered by this tab: + What is the contribution of electricity (product) to reference flow XXX? + Which input product contributes the most to impact category YYY? + What products contribute most to reference flow ZZZ? + + Shows: + Compare options button to change between 'Reference Flows' and 'Impact Categories' + 'Impact Category'/'Reference Flow' chooser with aggregation method + Plot/Table on/off and Relative/Absolute options for data + Plot/Table + Export options + """ + + def __init__(self, cs_name, parent=None): + super().__init__(parent) + + self.cache = {"totals": {}} # We cache the calculated data, as it can take some time to generate. + # We cache the individual calculation results, as they are re-used in multiple views + # e.g. FU1 x method1 x scenario1 + # may be seen in both 'Reference Flows' and 'Impact Categories', just with different axes. + # we also cache totals, not for calculation speed, but to be able to easily convert for relative results + self.caching = True # set to False to disable caching for debug + + header = get_header_layout_w_help("First Tier Contributions", self.help_button) + self.layout.addLayout(header) + self.layout.addWidget(self.cutoff_menu) + self.layout.addWidget(horizontal_line()) + combobox = self.build_combobox(has_method=True, has_func=True) + self.layout.addLayout(combobox) + self.layout.addWidget(horizontal_line()) + self.layout.addWidget(self.build_main_space()) + self.layout.addLayout(self.build_export(True, True)) + + # get relevant data from calculation setup + self.cs = cs_name + func_units = bd.calculation_setups[self.cs]["inv"] + self.func_keys = [list(fu.keys())[0] for fu in func_units] # extract a list of keys from the functional units + self.func_units = [ + {bd.get_activity(k): v for k, v in fu.items()} + for fu in func_units + ] + self.methods = bd.calculation_setups[self.cs]["ia"] + + self.contribution_fn = "First Tier contributions" + self.switches.configure(self.has_func, self.has_method) + self.connect_signals() + self.toggle_comparisons(self.switches.indexes.func) + + def update_tab(self): + """Update the tab.""" + if self.has_been_opened: + super().update_tab() + + def build_combobox( + self, has_method: bool = True, has_func: bool = False + ) -> QHBoxLayout: + self.combobox_menu.agg.addItems( + self.parent.contributions.DEFAULT_ACT_AGGREGATES ) + return super().build_combobox(has_method, has_func) + + def get_data(self, compare) -> List[list]: + """Get the data for analysis, either from self.cache or from calculation.""" + def try_cache(): + """Get data from cache if exists, otherwise return none.""" + if self.caching: + return self.cache.get(cache_key, None) + + def calculate(): + """Shorthand for getting calculation results.""" + return self.calculate_contributions(demand, demand_key, demand_index, + method=method, method_index=method_index, + scenario_lca=self.has_scenarios, scenario_index=scenario_index, + ) + + # get the right data + if self.has_scenarios: + # get the scenario index, if it is -1 (none selected), then use index first index (0) + scenario_index = max(self.combobox_menu.scenario.currentIndex(), 0) + else: + scenario_index = None + method_index = self.combobox_menu.method.currentIndex() + method = self.methods[method_index] + demand_index = self.combobox_menu.func.currentIndex() + demand = self.func_units[demand_index] + demand_key = self.func_keys[demand_index] + + all_data = [] + if compare == "Reference Flows": + # run the analysis for every reference flow + for demand_index, demand in enumerate(self.func_units): + demand_key = self.func_keys[demand_index] + cache_key = (demand_index, method_index, scenario_index) + # get data from cache if exists, otherwise calculate + if data := try_cache(): + all_data.append([demand_key, data]) + continue + + data = calculate() + if self.caching: + self.cache[cache_key] = data + all_data.append([demand_key, data]) + elif compare == "Impact Categories": + # run the analysis for every method + for method_index, method in enumerate(self.methods): + cache_key = (demand_index, method_index, scenario_index) + + # get data from cache if exists, otherwise calculate + if data := try_cache(): + all_data.append([method, data]) + continue + + data = calculate() + if self.caching: + self.cache[cache_key] = data + all_data.append([method, data]) + elif compare == "Scenarios": + # run the analysis for every scenario + for scenario_index in range(self.combobox_menu.scenario.count()): + scenario = self.combobox_menu.scenario.itemText(scenario_index) + cache_key = (demand_index, method_index, scenario_index) + + # get data from cache if exists, otherwise calculate + if data := try_cache(): + all_data.append([scenario, data]) + continue + + data = calculate() + if self.caching: + self.cache[cache_key] = data + all_data.append([scenario, data]) + + return all_data + + def calculate_contributions(self, demand, demand_key, demand_index, + method, method_index: int = None, + scenario_lca: bool = False, scenario_index: int = None) -> dict: + """Retrieve relevant activity data and calculate first tier contributions.""" + + def get_default_demands() -> dict: + """Get the inputs to calculate contributions from the activity""" + # get exchange keys leading to this activity + technosphere = bd.get_activity(demand_key).technosphere() + + keys = [exch.input.key for exch in technosphere if + exch.input.key != exch.output.key] + # find scale from production amount and demand amount + scale = demand[demand_key] / [p for p in bd.get_activity(demand_key).production()][0].amount + + amounts = [exch.amount * scale for exch in technosphere if + exch.input.key != exch.output.key] + demands = {keys[i]: amounts[i] for i, _ in enumerate(keys)} + return demands + + def get_scenario_demands() -> dict: + """Get the inputs to calculate contributions from the scenario matrix""" + # get exchange keys leading to this activity + technosphere = bd.get_activity(demand_key).technosphere() + demand_idx = _lca.product_dict[demand_key] + + keys = [exch.input.key for exch in technosphere if + exch.input.key != exch.output.key] + # find scale from production amount and demand amount + scale = demand[demand_key] / _lca.technosphere_matrix[_lca.activity_dict[demand_key], demand_idx] * -1 + + amounts = [] + + for exch in technosphere: + exch_idx = _lca.activity_dict[exch.input.key] + if exch.input.key != exch.output.key: + amounts.append(_lca.technosphere_matrix[exch_idx, demand_idx] * scale) + + # write al non-zero exchanges to demand dict + demands = {keys[i]: amounts[i] for i, _ in enumerate(keys) if amounts[i] != 0} + return demands + + # reuse LCA object from original calculation to skip 1 LCA + if scenario_lca: + # get score from the already calculated result + score = self.parent.mlca.lca_scores[demand_index, method_index, scenario_index] + + # get lca object from mlca class + self.parent.mlca.current = scenario_index + self.parent.mlca.update_matrices() + _lca = self.parent.mlca.lca + _lca.redo_lci(demand) + + else: + # get score from the already calculated result + score = self.parent.mlca.lca_scores[demand_index, method_index] + + # get lca object to calculate new results + _lca = self.parent.mlca.lca + + # set the correct method + _lca.switch_method(method) + _lca.lcia_calculation() + + if score == 0: + # no need to calculate contributions to '0' score + # technically it could be that positive and negative score of same amount negate to 0, but highly unlikely. + return {"Total": 0, demand_key: 0} + + data = {"Total": score} + remainder = score # contribution of demand_key + + if not scenario_lca: + new_demands = get_default_demands() + else: + new_demands = get_scenario_demands() + + # iterate over all activities demand_key is connected to + for key, amt in new_demands.items(): + + # recalculate for this demand + _lca.redo_lci({key: amt}) + _lca.redo_lcia() + + score = _lca.score + if score != 0: + # only store non-zero results + data[key] = score + remainder -= score # subtract this from remainder + + data[demand_key] = remainder + return data + + def key_to_metadata(self, key: tuple) -> list: + """Convert the key information to list with metadata. + + format: + [reference product, activity name, location, unit, database] + """ + return list(AB_metadata.get_metadata([key], ["reference product", "name", "location", "unit"]).iloc[0]) + [key[0]] + + def metadata_to_index(self, data: list) -> str: + """Convert list to formatted index. + + format: + reference product | activity name | location | unit | database + """ + return " | ".join(data) + + def data_to_df(self, all_data: List[list], compare: str) -> pd.DataFrame: + """Convert the provided data into a dataframe.""" + unique_keys = set() + # get all the unique keys: + d = {"index": [], "reference product": [], "name": [], + "location": [], "unit": [], "database": []} + meta_cols = set(d.keys()) + + for i, (item, data) in enumerate(all_data): + # item is a key, method or scenario depending on the `compares` + unique_keys.update(data.keys()) + # already add the total with right column formatting depending on `compares` + if compare == "Reference Flows": + col_name = self.metadata_to_index(self.key_to_metadata(item)) + elif compare == "Impact Categories": + col_name = self.metadata_to_index(list(item)) + elif compare == "Scenarios": + col_name = item + + self.cache["totals"][col_name] = data["Total"] + d[col_name] = [] + + all_data[i] = item, data, col_name + + self.unit = get_unit(self.parent.method_dict[self.combobox_menu.method.currentText()], self.relative) + + # convert to dict format to feed into dataframe + for key in unique_keys: + if key == "Total": + continue + # get metadata + metadata = self.key_to_metadata(key) + d["index"].append(self.metadata_to_index(metadata)) + d["reference product"].append(metadata[0]) + d["name"].append(metadata[1]) + d["location"].append(metadata[2]) + d["unit"].append(self.unit) + d["database"].append(metadata[4]) + # check for each dataset if we have values, otherwise add np.nan + for item, data, col_name in all_data: + if val := data.get(key, False): + value = val + else: + value = np.nan + d[col_name].append(value) + + df = pd.DataFrame(d) + data_cols = [col for col in df if col not in meta_cols] + df = df.dropna(subset=data_cols, how="all") + + # now, apply aggregation + group_on = self.combobox_menu.agg.currentText() + if group_on != "none": + df = df.groupby(by=group_on, as_index=False).sum() + df["index"] = df[group_on] + df = df[["index"] + data_cols] + meta_cols = ["index"] + + all_contributions = deepcopy(df) + + # now, apply cut-off + limit_type = self.cutoff_menu.limit_type + limit = self.cutoff_menu.cutoff_value + + # iterate over the columns to get contributors, then replace cutoff flows with nan + # nested for is slow, but this should rarely have to deal with >>50 rows (rows == technosphere exchanges) + contributors = df[data_cols].shape[0] + for col_num, col in enumerate(df[data_cols].T.values): + # now, get total: + if self.total_range: # total is based on the range + total = np.nansum(np.abs(col)) + else: # total is based on the score + total = np.nansum(col) + + col = np.nan_to_num(col) # replace nan with 0 + cont = ca.sort_array(col, limit=limit, limit_type=limit_type, total=total) + # write nans to values not present in cont + for row_num in range(contributors): + if row_num not in cont[:, 1]: + df.iloc[row_num, col_num + len(meta_cols)] = np.nan + + # drop any rows not contributing to anything + df = df.dropna(subset=data_cols, how="all") + + # sort by absolute mean + func = lambda row: np.nanmean(np.abs(row)) + if len(df) > 1: # but only sort if there is something to sort + df["_sort_me_"] = df[data_cols].apply(func, axis=1) + df.sort_values(by="_sort_me_", ascending=False, inplace=True) + del df["_sort_me_"] + + # add the total and rest values + total_and_rest = {col: [] for col in df} + for col in df: + if col == "index": + total_and_rest[col].extend(["Total", "Rest (+)", "Rest (-)"]) + elif col in data_cols: + # total + total = self.cache["totals"][col] + # positive and negative rest values + pos_rest = (np.sum((all_contributions[col].values)[all_contributions[col].values > 0]) + - np.sum((df[col].values)[df[col].values > 0])) + neg_rest = (np.sum((all_contributions[col].values)[all_contributions[col].values < 0]) + - np.sum((df[col].values)[df[col].values < 0])) + + total_and_rest[col].extend([total, pos_rest, neg_rest]) + else: + total_and_rest[col].extend(["", "", ""]) + + # add the two df together + df = pd.concat([pd.DataFrame(total_and_rest), df], axis=0) + + # normalize + if self.relative: + totals = [self.cache["totals"][col] for col in data_cols] + df[data_cols] = df[data_cols] / totals + + return df + + def update_dataframe(self, *args, **kwargs): + """Retrieve the product contributions.""" + + compare = self.switches.currentText() + + all_data = self.get_data(compare) + df = self.data_to_df(all_data, compare) + return df class CorrelationsTab(NewAnalysisTab): diff --git a/activity_browser/ui/figures.py b/activity_browser/ui/figures.py index 4d65b9bf0..398b5a890 100644 --- a/activity_browser/ui/figures.py +++ b/activity_browser/ui/figures.py @@ -182,14 +182,19 @@ def __init__(self): self.plot_name = "Contributions" def plot(self, df: pd.DataFrame, unit: str = None): - """Plot a horizontal bar chart of the process contributions.""" + """Plot a horizontal stacked bar chart of contributions, + add 'total' marker if both positive and negative results are present.""" dfp = df.copy() + dfp = dfp.iloc[:, ::-1] # reverse column names so they align with calculation setup and rest of results + dfp.index = dfp["index"] dfp.drop( dfp.select_dtypes(["object"]), axis=1, inplace=True ) # get rid of all non-numeric columns (metadata) if "Total" in dfp.index: dfp.drop("Total", inplace=True) + # drop rows if all values are 0 + dfp = dfp.loc[~(dfp == 0).all(axis=1)] self.ax.clear() canvas_width_inches, canvas_height_inches = self.get_canvas_size_in_inches() @@ -204,9 +209,18 @@ def plot(self, df: pd.DataFrame, unit: str = None): dfp.index = dfp.index.str.strip("_ \n\t") dfp.columns = dfp.columns.str.strip("_ \n\t") + # set colormap to use + items = dfp.shape[0] # how many contribution items + # skip grey and black at start/end of cmap + cmap = plt.cm.nipy_spectral_r(np.linspace(0, 1, items + 2))[1:-1] + colors = {item: color for item, color in zip(dfp.index, cmap)} + # overwrite rest values to grey + colors["Rest (+)"] = [0.8, 0.8, 0.8, 1.] + colors["Rest (-)"] = [0.8, 0.8, 0.8, 1.] + dfp.T.plot.barh( stacked=True, - cmap=plt.cm.nipy_spectral_r, + color=colors, ax=self.ax, legend=False if dfp.shape[0] >= self.MAX_LEGEND else True, ) @@ -224,6 +238,24 @@ def plot(self, df: pd.DataFrame, unit: str = None): # grid self.ax.grid(which="major", axis="x", color="grey", linestyle="dashed") self.ax.set_axisbelow(True) # puts gridlines behind bars + # make the zero line more present + grid = self.ax.get_xgridlines() + # get the 0 line from all gridlines + label_pos = [i for i, label in enumerate(self.ax.get_xticklabels()) if label.get_position()[0] == 0.0] + if len(label_pos) > 0: + zero_line = grid[label_pos[0]] + zero_line.set_color("black") + zero_line.set_linestyle("solid") + + # total marker when both negative and positive results are present in a column + marker_size = max(min(150 / dfp.shape[1], 35), 10) # set marker size dynamic between 10 - 35 + for i, col in enumerate(dfp): + total = np.sum(dfp[col]) + abs_total = np.sum(np.abs(dfp[col])) + if abs(total) != abs_total: + self.ax.plot(total, i, + markersize=marker_size, marker="d", fillstyle="left", + markerfacecolor="black", markerfacecoloralt="grey", markeredgecolor="white") # TODO review: remove or enable diff --git a/activity_browser/ui/tables/models/lca_results.py b/activity_browser/ui/tables/models/lca_results.py index 5b4eab035..a4e8a46b2 100644 --- a/activity_browser/ui/tables/models/lca_results.py +++ b/activity_browser/ui/tables/models/lca_results.py @@ -27,9 +27,12 @@ def sync(self, df): class ContributionModel(PandasModel): - def sync(self, df): - self._dataframe = df.replace(np.nan, "", regex=True) - # drop the 'rest' row if empty - if self._dataframe.select_dtypes(include=np.number).iloc[1, :].sum() == 0: - self._dataframe.drop(labels=1, inplace=True) + def sync(self, df, unit="relative share"): + + if "unit" in df.columns: + # overwrite the unit col with 'relative share' if looking at relative results (except 3 'total' and 'rest' rows) + df["unit"] = [""] * 3 + [unit] * (len(df) - 3) + + # drop any rows where all numbers are 0 + self._dataframe = df.loc[~(df.select_dtypes(include=np.number) == 0).all(axis=1)] self.updated.emit()