Skip to content

Commit

Permalink
Merge branch 'main' into gaugup/AddLinkCheck
Browse files Browse the repository at this point in the history
  • Loading branch information
gaugup authored Sep 20, 2023
2 parents f03a370 + 8277afe commit 634d600
Show file tree
Hide file tree
Showing 88 changed files with 17,114 additions and 3,567 deletions.
2 changes: 2 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[flake8]
max-line-length = 127
31 changes: 31 additions & 0 deletions .github/workflows/notebook-linting.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# This workflow will lint jupyter notebooks with flake8-nb.
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Notebook linting

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
schedule:
- cron: '30 5 * * *'

jobs:
build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8-nb==0.4.0
- name: Lint notebooks with flake8_nb
run: |
# stop the build if there are flake8 errors in notebooks
flake8_nb docs/source/notebooks/ --statistics --max-line-length=127
46 changes: 46 additions & 0 deletions .github/workflows/notebook-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Notebook tests

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
schedule:
- cron: '30 5 * * *'

jobs:
build:

runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
os: [ubuntu-latest, macos-latest]
exclude:
- os: macos-latest
python-version: "3.7"
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }} ${{ matrix.os }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Upgrade pip
run: |
python -m pip install --upgrade pip
- name: Install core dependencies
run: |
pip install -r requirements.txt
- name: Install deep learning dependencies
run: |
pip install -r requirements-deeplearning.txt
- name: Install test dependencies
run: |
pip install -r requirements-test.txt
- name: Test with pytest
run: |
# pytest
pytest tests/ -m "notebook_tests" --durations=10 --doctest-modules --junitxml=junit/test-results.xml --cov=dice_ml --cov-report=xml --cov-report=html
11 changes: 4 additions & 7 deletions .github/workflows/python-linting.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# This workflow will lint python code with flake8 and flake8-nb.
# This workflow will lint python code with flake8.
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Python linting
Expand All @@ -17,10 +17,10 @@ jobs:

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.7
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: 3.7
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand All @@ -37,7 +37,4 @@ jobs:
# Check for cyclometric complexity for specific files where this metric has been
# reduced to ten and below
flake8 dice_ml/data_interfaces/ --count --max-complexity=10 --max-line-length=127
- name: Lint notebooks with flake8_nb
run: |
# stop the build if there are flake8 errors in notebooks
flake8_nb docs/source/notebooks/ --statistics --max-line-length=127
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: [3.7, 3.8, 3.9, "3.10"]
os: [ubuntu-latest, macos-latest]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
os: [ubuntu-latest, macos-latest, windows-latest]
exclude:
- os: macos-latest
python-version: "3.7"
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }} ${{ matrix.os }}
Expand All @@ -40,14 +43,14 @@ jobs:
- name: Test with pytest
run: |
# pytest
pytest tests/ --durations=10 --doctest-modules --junitxml=junit/test-results.xml --cov=dice_ml --cov-report=xml --cov-report=html
pytest tests/ -m "not notebook_tests" --durations=10 --doctest-modules --junitxml=junit/test-results.xml --cov=dice_ml --cov-report=xml --cov-report=html
- name: Publish Unit Test Results
uses: EnricoMi/publish-unit-test-result-action/composite@v1
if: ${{ (matrix.python-version == '3.8') && (matrix.os == 'ubuntu-latest') }}
with:
files: junit/test-results.xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v2
uses: codecov/codecov-action@v3
if: ${{ (matrix.python-version == '3.8') && (matrix.os == 'ubuntu-latest') }}
with:
token: ${{ secrets.CODECOV_TOKEN }}
Expand Down
14 changes: 10 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
|BuildStatus|_ |PyPiVersion|_ |PythonSupport|_ |CondaVersion|_
|BuildStatusTests|_ |BuildStatusNotebooks|_ |PyPiVersion|_ |PythonSupport|_ |Downloads|_ |CondaVersion|_

.. |BuildStatus| image:: https://github.com/interpretml/dice/workflows/Python%20package/badge.svg
.. _BuildStatus: https://github.com/interpretml/dice/actions?query=workflow%3A%22Python+package%22
.. |BuildStatusTests| image:: https://github.com/interpretml/DiCE/actions/workflows/python-package.yml/badge.svg?branch=main
.. _BuildStatusTests: https://github.com/interpretml/DiCE/actions/workflows/python-package.yml?query=workflow%3A%22Python+package%22

.. |BuildStatusNotebooks| image:: https://github.com/interpretml/DiCE/actions/workflows/notebook-tests.yml/badge.svg?branch=main
.. _BuildStatusNotebooks: https://github.com/interpretml/DiCE/actions/workflows/notebook-tests.yml?query=workflow%3A%22Notebook+tests%22

.. |PyPiVersion| image:: https://img.shields.io/pypi/v/dice-ml
.. _PyPiVersion: https://pypi.org/project/dice-ml/

.. |Downloads| image:: https://static.pepy.tech/personalized-badge/dice-ml?period=total&units=international_system&left_color=grey&right_color=orange&left_text=Downloads
.. _Downloads: https://pepy.tech/project/dice-ml

.. |PythonSupport| image:: https://img.shields.io/pypi/pyversions/dice-ml
.. _PythonSupport: https://pypi.org/project/dice-ml/

Expand Down Expand Up @@ -35,7 +41,7 @@ Diverse Counterfactual Explanations (DiCE) for ML
Explanations are critical for machine learning, especially as machine learning-based systems are being used to inform decisions in societally critical domains such as finance, healthcare, education, and criminal justice.
However, most explanation methods depend on an approximation of the ML model to
create an interpretable explanation. For example,
consider a person who applied for a loan and was rejected by the loan distribution algorithm of a financial company. Typically, the company may provide an explanation on why the loan was rejected, for example, due to "poor credit history". However, such an explanation does not help the person decide *what they do should next* to improve their chances of being approved in the future. Critically, the most important feature may not be enough to flip the decision of the algorithm, and in practice, may not even be changeable such as gender and race.
consider a person who applied for a loan and was rejected by the loan distribution algorithm of a financial company. Typically, the company may provide an explanation on why the loan was rejected, for example, due to "poor credit history". However, such an explanation does not help the person decide *what they should do next* to improve their chances of being approved in the future. Critically, the most important feature may not be enough to flip the decision of the algorithm, and in practice, may not even be changeable such as gender and race.


DiCE implements `counterfactual (CF) explanations <https://arxiv.org/abs/1711.00399>`_ that provide this information by showing feature-perturbed versions of the same person who would have received the loan, e.g., ``you would have received the loan if your income was higher by $10,000``. In other words, it provides "what-if" explanations for model output and can be a useful complement to other explanation methods, both for end-users and model developers.
Expand Down
2 changes: 1 addition & 1 deletion dice_ml/counterfactual_explanations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
import os

import jsonschema
from raiutils.exceptions import UserConfigValidationException

from dice_ml.constants import _SchemaVersions
from dice_ml.diverse_counterfactuals import (CounterfactualExamples,
_DiverseCFV2SchemaConstants)
from dice_ml.utils.exception import UserConfigValidationException


class _CommonSchemaConstants:
Expand Down
5 changes: 3 additions & 2 deletions dice_ml/data_interfaces/base_data_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

from abc import ABC, abstractmethod

from dice_ml.utils.exception import (SystemException,
UserConfigValidationException)
from raiutils.exceptions import UserConfigValidationException

from dice_ml.utils.exception import SystemException


class _BaseData(ABC):
Expand Down
2 changes: 1 addition & 1 deletion dice_ml/data_interfaces/private_data_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ def get_ohe_min_max_normalized_data(self, query_instance):
a dataframe, a list, or a list of dicts"""
query_instance = self.prepare_query_instance(query_instance)
ohe_base_df = self.prepare_df_for_ohe_encoding()
temp = ohe_base_df.append(query_instance, ignore_index=True, sort=False)
temp = pd.concat([ohe_base_df, query_instance], ignore_index=True, sort=False)
temp = self.one_hot_encode_data(temp)
temp = temp.tail(query_instance.shape[0]).reset_index(drop=True)
# returns a pandas dataframe
Expand Down
10 changes: 6 additions & 4 deletions dice_ml/data_interfaces/public_data_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@

import numpy as np
import pandas as pd
from raiutils.exceptions import UserConfigValidationException
from sklearn.preprocessing import LabelEncoder

from dice_ml.data_interfaces.base_data_interface import _BaseData
from dice_ml.utils.exception import (SystemException,
UserConfigValidationException)
from dice_ml.utils.exception import SystemException


class PublicData(_BaseData):
Expand Down Expand Up @@ -138,9 +138,11 @@ def get_features_range(self, permitted_range_input=None, features_dict=None):

def get_data_type(self, col):
"""Infers data type of a continuous feature from the training data."""
if (self.data_df[col].dtype == np.int64) or (self.data_df[col].dtype == np.int32):
if (self.data_df[col].dtype == np.int64) or (self.data_df[col].dtype == np.int32) or \
(self.data_df[col].dtype == np.int16) or (self.data_df[col].dtype == np.int8):
return 'int'
elif (self.data_df[col].dtype == np.float64) or (self.data_df[col].dtype == np.float32):
elif (self.data_df[col].dtype == np.float64) or (self.data_df[col].dtype == np.float32) or \
(self.data_df[col].dtype == np.float16):
return 'float'
else:
raise ValueError("Unknown data type of feature %s: must be int or float" % col)
Expand Down
3 changes: 2 additions & 1 deletion dice_ml/dice.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
frameworks such as Tensorflow or PyTorch or sklearn, and different methods
such as RandomSampling, DiCEKD or DiCEGenetic"""

from raiutils.exceptions import UserConfigValidationException

from dice_ml.constants import BackEndTypes, SamplingStrategy
from dice_ml.data_interfaces.private_data_interface import PrivateData
from dice_ml.explainer_interfaces.explainer_base import ExplainerBase
from dice_ml.utils.exception import UserConfigValidationException


class Dice(ExplainerBase):
Expand Down
15 changes: 11 additions & 4 deletions dice_ml/diverse_counterfactuals.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import copy
import json
import math

import pandas as pd

Expand Down Expand Up @@ -134,10 +135,16 @@ def display_df(self, df, show_only_changes):
org = self.test_instance_df.values.tolist()[0]
for ix in range(df.shape[0]):
for jx in range(len(org)):
if newdf[ix][jx] == org[jx]:
newdf[ix][jx] = '-'
if not isinstance(newdf[ix][jx], str):
if math.isclose(newdf[ix][jx], org[jx], rel_tol=abs(org[jx]/10000)):
newdf[ix][jx] = '-'
else:
newdf[ix][jx] = str(newdf[ix][jx])
else:
newdf[ix][jx] = str(newdf[ix][jx])
if newdf[ix][jx] == org[jx]:
newdf[ix][jx] = '-'
else:
newdf[ix][jx] = str(newdf[ix][jx])
display(pd.DataFrame(newdf, columns=df.columns, index=df.index)) # works only in Jupyter notebook

def visualize_as_list(self, display_sparse_df=True, show_only_changes=False):
Expand All @@ -157,7 +164,7 @@ def print_list(self, li, show_only_changes):
org = self.test_instance_df.values.tolist()[0]
for ix in range(len(newli)):
for jx in range(len(newli[ix])):
if newli[ix][jx] == org[jx]:
if math.isclose(newli[ix][jx], org[jx], rel_tol=abs(org[jx]/10000)):
newli[ix][jx] = '-'
print(newli[ix])

Expand Down
4 changes: 2 additions & 2 deletions dice_ml/explainer_interfaces/dice_genetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@

import numpy as np
import pandas as pd
from raiutils.exceptions import UserConfigValidationException

from dice_ml import diverse_counterfactuals as exp
from dice_ml.constants import ModelTypes
from dice_ml.explainer_interfaces.explainer_base import ExplainerBase
from dice_ml.utils.exception import UserConfigValidationException


class DiceGenetic(ExplainerBase):
Expand Down Expand Up @@ -507,7 +507,7 @@ def find_counterfactuals(self, query_instance, desired_range, desired_class,

# converting to dataframe
query_instance_df = self.label_decode(query_instance)
query_instance_df[self.data_interface.outcome_name] = self.test_pred
query_instance_df[self.data_interface.outcome_name] = self.get_model_output_from_scores(self.test_pred)
self.final_cfs_df = self.label_decode_cfs(self.final_cfs)
self.final_cfs_df_sparse = copy.deepcopy(self.final_cfs_df)

Expand Down
8 changes: 4 additions & 4 deletions dice_ml/explainer_interfaces/dice_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ def find_counterfactuals(self, query_instance, desired_class, optimizer, learnin
temp_cfs_stored = self.round_off_cfs(assign=False)
test_preds_stored = [self.predict_fn(cf) for cf in temp_cfs_stored]

if((self.target_cf_class == 0 and all(i <= self.stopping_threshold for i in test_preds_stored)) or
if ((self.target_cf_class == 0 and all(i <= self.stopping_threshold for i in test_preds_stored)) or
(self.target_cf_class == 1 and all(i >= self.stopping_threshold for i in test_preds_stored))):
avg_preds_dist = np.mean([abs(pred[0]-self.stopping_threshold) for pred in test_preds_stored])
if avg_preds_dist < self.min_dist_from_threshold[loop_ix]:
Expand All @@ -533,7 +533,7 @@ def find_counterfactuals(self, query_instance, desired_class, optimizer, learnin
self.cfs_preds = [self.predict_fn(cfs) for cfs in self.final_cfs]

# update final_cfs from backed up CFs if valid CFs are not found
if((self.target_cf_class == 0 and any(i[0] > self.stopping_threshold for i in self.cfs_preds)) or
if ((self.target_cf_class == 0 and any(i[0] > self.stopping_threshold for i in self.cfs_preds)) or
(self.target_cf_class == 1 and any(i[0] < self.stopping_threshold for i in self.cfs_preds))):
for loop_ix in range(loop_find_CFs):
if self.min_dist_from_threshold[loop_ix] != 100:
Expand Down Expand Up @@ -580,7 +580,7 @@ def find_counterfactuals(self, query_instance, desired_class, optimizer, learnin
final_cfs_df_sparse = None

m, s = divmod(self.elapsed, 60)
if((self.target_cf_class == 0 and all(i <= self.stopping_threshold for i in self.cfs_preds)) or
if ((self.target_cf_class == 0 and all(i <= self.stopping_threshold for i in self.cfs_preds)) or
(self.target_cf_class == 1 and all(i >= self.stopping_threshold for i in self.cfs_preds))):
self.total_CFs_found = max(loop_find_CFs, self.total_CFs)
valid_ix = [ix for ix in range(max(loop_find_CFs, self.total_CFs))] # indexes of valid CFs
Expand All @@ -590,7 +590,7 @@ def find_counterfactuals(self, query_instance, desired_class, optimizer, learnin
self.total_CFs_found = 0
valid_ix = [] # indexes of valid CFs
for cf_ix, pred in enumerate(self.cfs_preds):
if((self.target_cf_class == 0 and pred[0][0] < self.stopping_threshold) or
if ((self.target_cf_class == 0 and pred[0][0] < self.stopping_threshold) or
(self.target_cf_class == 1 and pred[0][0] > self.stopping_threshold)):
self.total_CFs_found += 1
valid_ix.append(cf_ix)
Expand Down
8 changes: 4 additions & 4 deletions dice_ml/explainer_interfaces/dice_tensorflow1.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,7 @@ def find_counterfactuals(self, query_instance, limit_steps_ls, desired_class="op
temp_cfs_stored = self.round_off_cfs(assign=False)
test_preds_stored = [self.predict_fn(cf) for cf in temp_cfs_stored]

if((self.target_cf_class[0][0] == 0 and all(i <= self.stopping_threshold for i in test_preds_stored)) or
if ((self.target_cf_class[0][0] == 0 and all(i <= self.stopping_threshold for i in test_preds_stored)) or
(self.target_cf_class[0][0] == 1 and all(i >= self.stopping_threshold for i in test_preds_stored))):
avg_preds_dist = np.mean([abs(pred-self.stopping_threshold) for pred in test_preds_stored])
if avg_preds_dist < self.min_dist_from_threshold[loop_ix]:
Expand All @@ -634,7 +634,7 @@ def find_counterfactuals(self, query_instance, limit_steps_ls, desired_class="op
self.cfs_preds = [self.predict_fn(cfs) for cfs in self.final_cfs]

# update final_cfs from backed up CFs if valid CFs are not found
if((self.target_cf_class == 0 and any(i[0] > self.stopping_threshold for i in self.cfs_preds)) or
if ((self.target_cf_class == 0 and any(i[0] > self.stopping_threshold for i in self.cfs_preds)) or
(self.target_cf_class == 1 and any(i[0] < self.stopping_threshold for i in self.cfs_preds))):
for loop_ix in range(loop_find_CFs):
if self.min_dist_from_threshold[loop_ix] != 100:
Expand Down Expand Up @@ -666,7 +666,7 @@ def find_counterfactuals(self, query_instance, limit_steps_ls, desired_class="op
final_cfs_df_sparse = None

m, s = divmod(self.elapsed, 60)
if((self.target_cf_class == 0 and all(i <= self.stopping_threshold for i in self.cfs_preds)) or
if ((self.target_cf_class == 0 and all(i <= self.stopping_threshold for i in self.cfs_preds)) or
(self.target_cf_class == 1 and all(i >= self.stopping_threshold for i in self.cfs_preds))):
self.total_CFs_found = max(loop_find_CFs, self.total_CFs)
valid_ix = [ix for ix in range(max(loop_find_CFs, self.total_CFs))] # indexes of valid CFs
Expand All @@ -676,7 +676,7 @@ def find_counterfactuals(self, query_instance, limit_steps_ls, desired_class="op
self.total_CFs_found = 0
valid_ix = [] # indexes of valid CFs
for cf_ix, pred in enumerate(self.cfs_preds):
if((self.target_cf_class == 0 and pred < self.stopping_threshold) or
if ((self.target_cf_class == 0 and pred < self.stopping_threshold) or
(self.target_cf_class == 1 and pred > self.stopping_threshold)):
self.total_CFs_found += 1
valid_ix.append(cf_ix)
Expand Down
Loading

0 comments on commit 634d600

Please sign in to comment.