Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Several CA fixes #1383

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 35 additions & 8 deletions activity_browser/bwutils/multilca.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import OrderedDict
from copy import deepcopy
from typing import Iterable, Optional, Union
from logging import getLogger

Expand Down Expand Up @@ -433,15 +434,30 @@ def _build_dict(
"""
topcontribution_dict = dict()
for fu_or_method, col in FU_M_index.items():

contribution_col = contributions[col, :]
total = contribution_col.sum()

top_contribution = ca.sort_array(
contributions[col, :], limit=limit, limit_type=limit_type
contribution_col, limit=limit, limit_type=limit_type, total=total
)

# split and calculate remaining rest sections for positive and negative part
pos_rest = (
np.sum(contribution_col[contribution_col > 0])
- np.sum(top_contribution[top_contribution[:, 0] > 0][:, 0])
)
neg_rest = (
np.sum(contribution_col[contribution_col < 0])
- np.sum(top_contribution[top_contribution[:, 0] < 0][:, 0])
)

cont_per = OrderedDict()
cont_per.update(
{
("Total", ""): contributions[col, :].sum(),
("Rest", ""): contributions[col, :].sum()
- top_contribution[:, 0].sum(),
("Total", ""): total,
("Rest (+)", ""): pos_rest,
("Rest (-)", ""): neg_rest,
}
)
for value, index in top_contribution:
Expand Down Expand Up @@ -544,12 +560,12 @@ def join_df_with_metadata(

if special_keys:
# replace index keys with labels
try: # first put Total and Rest to the first two positions in the dataframe
try: # first put Total, Rest (+) and Rest (-) to the first three positions in the dataframe
complete_index = special_keys + keys
joined = joined.reindex(complete_index, axis="index", fill_value=0.0)
except:
log.error(
"Could not put Total and Rest on positions 0 and 1 in the dataframe."
"Could not put 'Total', 'Rest (+)' and 'Rest (-)' on positions 0, 1 and 2 in the dataframe."
)
joined.index = cls.get_labels(joined.index, fields=x_fields)
return joined
Expand Down Expand Up @@ -583,7 +599,7 @@ def get_labelled_contribution_dict(
# If the cont_dict has tuples for keys, coerce df.columns into MultiIndex
if all(isinstance(k, tuple) for k in cont_dict.keys()):
df.columns = pd.MultiIndex.from_tuples(df.columns)
special_keys = [("Total", ""), ("Rest", "")]
special_keys = [("Total", ""), ("Rest (+)", ""), ("Rest (-)", "")]

# replace all 0 values with NaN and drop all rows with only NaNs
# EXCEPT for the special keys
Expand All @@ -596,6 +612,17 @@ def get_labelled_contribution_dict(
)
df = df.loc[index]

# sort on absolute mean of a row
df_bot = deepcopy(df.iloc[3:, :])

func = lambda row: np.nanmean(np.abs(row))

df_bot["_sort_me_"] = (df_bot.select_dtypes(include=np.number)).apply(func, axis=1)
df_bot.sort_values(by="_sort_me_", ascending=False, inplace=True)
del df_bot["_sort_me_"]

df = pd.concat([df.iloc[:3, :], df_bot], axis=0)

if not mask:
joined = self.join_df_with_metadata(
df, x_fields=x_fields, y_fields=y_fields, special_keys=special_keys
Expand All @@ -617,7 +644,7 @@ def adjust_table_unit(df: pd.DataFrame, method: Optional[tuple]) -> pd.DataFrame
"""Given a dataframe, adjust the unit of the table to either match the given method, or not exist."""
if "unit" not in df.columns:
return df
keys = df.index[~df["index"].isin({"Total", "Rest"})]
keys = df.index[~df["index"].isin({"Total", "Rest (+)", "Rest (-)"})]
unit = bd.Method(method).metadata.get("unit") if method else "unit"
df.loc[keys, "unit"] = unit
return df
Expand Down
28 changes: 25 additions & 3 deletions activity_browser/docs/wiki/LCA-Results.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ we call the _from_ part of the contributions (the EFs or activities above) _enti
There are several ways Activity Browser manipulates your results by default.
- The results are **sorted** so that the row with the largest (absolute) average values are shown first.
- A `cut-off` of 5% is applied, this only shows results that contribute at least 5% to the total result,
all other entities are grouped into a `Rest` group.
all other entities are grouped into a `Rest (+)` or `Rest (-)` groups.
- The contributions are _normalized_ to the impact of that reference flow, meaning they are show as a percentage,
counting up to 100% for every item you compare.

Expand All @@ -84,7 +84,8 @@ The `Relative` mode shows contributions _from_ entities of _x_% or higher.
The `Top #` mode shows contributions from the _x_ entities that contribute the most (as absolute).
You can adjust the `Cut-off level` to change how many results you see.

All results that don't make the cut-off will be grouped into the `Rest` group.
All results that don't make the cut-off will be grouped into the `Rest (+)` and `Rest (-)` groups.
The Rest groups are only present when there are positive or negative numbers remaining for the respective rest groups.

#### Compare
The `Compare` menu allows you to compare different dimensions of results.
Expand All @@ -107,7 +108,28 @@ You can disable one of them if you want to focus on one of them.

#### Relative and Absolute
Finally, you can choose between `Relative` and `Absolute` results.
The `Relative` results will sum to 100%, the `Absolute` results will sum to the impact score.
The `Relative` results will sum to 100% (the total score), the `Absolute` results will sum to the impact score.

### Positive and negative numbers in contribution results
It can happen in LCA that you get both positive and negative numbers in your contribution results.
Some of these reasons could be negative characterization factors, flows with negative numbers or using substitution flows.

When there are both positive and negative numbers in the result, Activity Browser will show a marker to indicate
where the total score is, and show positive and negative contributions to the impact separately.

Below is a simple example (with unrealistic values) to demonstrate this:

![CA example with positive and negative results](./assets/ca_positive_negative_example.png)

Other softwares (e.g. [Brightway2-Analyzer](https://github.com/brightway-lca/brightway2-analyzer))
may use a different 'total', meaning the contributions may look different.
For example, Brightway2-Analyzer uses the total of absolute values
(so the range of numbers from the lowest negative number to the highest positive number) as 100% of the score.

> [!IMPORTANT]
> Due to Activity Browser using 100% as the 'total' when you sum all positive contributions,
> positive results will sum to over 100% when there are also negative numbers, which will sum together to 100%.
> Even single contributions may be over 100%.

## Sankey
The `Sankey` tab shows results from [graph traversal](https://docs.brightway.dev/projects/graphtools/en/latest/index.html).
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
26 changes: 24 additions & 2 deletions activity_browser/ui/figures.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,17 @@ def __init__(self):
self.plot_name = "Contributions"

def plot(self, df: pd.DataFrame, unit: str = None):
"""Plot a horizontal bar chart of the process contributions."""
"""Plot a horizontal stacked bar chart of contributions,
add 'total' marker if both positive and negative results are present."""
dfp = df.copy()
dfp.index = dfp["index"]
dfp.drop(
dfp.select_dtypes(["object"]), axis=1, inplace=True
) # get rid of all non-numeric columns (metadata)
if "Total" in dfp.index:
dfp.drop("Total", inplace=True)
# drop rows if all values are 0
dfp = dfp.loc[~(dfp == 0).all(axis=1)]

self.ax.clear()
canvas_width_inches, canvas_height_inches = self.get_canvas_size_in_inches()
Expand All @@ -204,9 +207,18 @@ def plot(self, df: pd.DataFrame, unit: str = None):
dfp.index = dfp.index.str.strip("_ \n\t")
dfp.columns = dfp.columns.str.strip("_ \n\t")

# set colormap to use
items = dfp.shape[0] # how many contribution items
# skip grey and black at start/end of cmap
cmap = plt.cm.nipy_spectral_r(np.linspace(0, 1, items + 2))[1:-1]
colors = {item: color for item, color in zip(dfp.index, cmap)}
# overwrite rest values to grey
colors["Rest (+)"] = [0.8, 0.8, 0.8, 1.]
colors["Rest (-)"] = [0.8, 0.8, 0.8, 1.]

dfp.T.plot.barh(
stacked=True,
cmap=plt.cm.nipy_spectral_r,
color=colors,
ax=self.ax,
legend=False if dfp.shape[0] >= self.MAX_LEGEND else True,
)
Expand All @@ -225,6 +237,16 @@ def plot(self, df: pd.DataFrame, unit: str = None):
self.ax.grid(which="major", axis="x", color="grey", linestyle="dashed")
self.ax.set_axisbelow(True) # puts gridlines behind bars

# total marker when both negative and positive results are present in a column
marker_size = max(min(150 / dfp.shape[1], 35), 10) # set marker size dyanmic between 10 - 35
for i, col in enumerate(dfp):
total = np.sum(dfp[col])
abs_total = np.sum(np.abs(dfp[col]))
if abs(total) != abs_total:
self.ax.plot(total, i,
markersize=marker_size, marker="d", fillstyle="left",
markerfacecolor="black", markerfacecoloralt="grey", markeredgecolor="white")

# TODO review: remove or enable

# refresh canvas
Expand Down
6 changes: 2 additions & 4 deletions activity_browser/ui/tables/models/lca_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ def sync(self, df):

class ContributionModel(PandasModel):
def sync(self, df):
self._dataframe = df.replace(np.nan, "", regex=True)
# drop the 'rest' row if empty
if self._dataframe.select_dtypes(include=np.number).iloc[1, :].sum() == 0:
self._dataframe.drop(labels=1, inplace=True)
# drop any rows where all numbers are 0
self._dataframe = df.loc[~(df.select_dtypes(include=np.number) == 0).all(axis=1)]
self.updated.emit()
Loading