Skip to content

Commit

Permalink
Added test for multitarget classification in ensemble, changes test o…
Browse files Browse the repository at this point in the history
…f single target classification ensemble.
  • Loading branch information
kaueltzen committed Nov 22, 2024
1 parent e5c84bc commit a72ff13
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 6 deletions.
10 changes: 5 additions & 5 deletions modnet/models/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def predict(
return_unc: bool = False,
return_prob: bool = False,
remap_out_of_bounds: bool = True,
class_voting: str = "soft",
voting_type: str = "soft",
) -> pd.DataFrame:
"""Predict the target values for the passed MODData.
Expand All @@ -177,7 +177,7 @@ def predict(
class OR only return the most probable class.
return_unc: whether to return a second dataframe containing the uncertainties
remap_out_of_bounds: whether to remap out-of-bounds values to the nearest bound.
class_voting: If classification task and return_prob is False, determines
voting_type: If classification task and return_prob is False, determines
if soft or hard ensemble voting is performed.
Returns:
Expand All @@ -189,7 +189,7 @@ class OR only return the most probable class.
if (
not return_prob
and max(self.num_classes.values()) >= 2
and class_voting == "soft"
and voting_type == "soft"
):
return_prob_comput = True

Expand All @@ -205,7 +205,7 @@ class OR only return the most probable class.
p_columns = p.columns
if max(self.num_classes.values()) == 0 or return_prob:
p_mean = np.array(all_predictions).mean(axis=0)
elif class_voting == "soft":
elif voting_type == "soft":
p_columns, p_mean = [], []
for prop in set(["_".join(s.split("_")[:-2]) for s in p.columns]):
prop_ids = [
Expand All @@ -222,7 +222,7 @@ class OR only return the most probable class.
arr=np.array(all_predictions),
)

p_std = np.array(all_predictions).std(axis=0) # TODO adapt for soft voting
p_std = np.array(all_predictions).std(axis=0)
df_mean = pd.DataFrame(p_mean, index=p.index, columns=p_columns)
df_std = pd.DataFrame(p_std, index=p.index, columns=p.columns)

Expand Down
48 changes: 47 additions & 1 deletion modnet/tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,53 @@ def is_metal(egap):

model.fit(data, epochs=2)
model.predict(data)
model.predict(data, return_unc=True)
model.predict(data, return_prob=False, voting_type="soft", return_unc=True)
model.predict(data, return_prob=False, voting_type="hard", return_unc=True)
model.predict(data, return_prob=True, return_unc=True)
assert not np.isnan(model.evaluate(data))


def test_train_small_bootstrap_multi_target_classif(small_moddata, tf_session):
"""Tests the multi target classification training."""
from modnet.models import EnsembleMODNetModel

data = small_moddata
# set 'optimal' features manually
data.optimal_features = [
col for col in data.df_featurized.columns if col.startswith("ElementProperty")
]

def is_metal(egap):
if egap == 0:
return 1
else:
return 0

def eform_cl(eform):
if eform > 0:
return 1
else:
return 0

data.df_targets["is_metal"] = data.df_targets["egap"].apply(is_metal)
data.df_targets["eform_cl"] = data.df_targets["eform"].apply(eform_cl)
model = EnsembleMODNetModel(
[[["eform_cl"], ["is_metal"]]],
weights={
"eform_cl": 1,
"is_metal": 1,
},
num_neurons=[[16], [8], [8], [4]],
num_classes={"eform_cl": 2, "is_metal": 2},
n_feat=10,
n_models=3,
bootstrap=True,
)

model.fit(data, epochs=2)
model.predict(data, return_prob=True, return_unc=True)
model.predict(data, return_prob=False, voting_type="soft", return_unc=True)
model.predict(data, return_prob=False, voting_type="hard", return_unc=True)
assert not np.isnan(model.evaluate(data))


Expand Down

0 comments on commit a72ff13

Please sign in to comment.