Added test for multitarget classification in ensemble, changes test o…

…f single target classification ensemble.
ppdebreuck · Nov 22, 2024 · a72ff13 · a72ff13
1 parent e5c84bc
commit a72ff13
Show file tree

Hide file tree

Showing 2 changed files with 52 additions and 6 deletions.
diff --git a/modnet/models/ensemble.py b/modnet/models/ensemble.py
@@ -166,7 +166,7 @@ def predict(
         return_unc: bool = False,
         return_prob: bool = False,
         remap_out_of_bounds: bool = True,
-        class_voting: str = "soft",
+        voting_type: str = "soft",
     ) -> pd.DataFrame:
         """Predict the target values for the passed MODData.
 
@@ -177,7 +177,7 @@ def predict(
                 class OR only return the most probable class.
             return_unc: whether to return a second dataframe containing the uncertainties
             remap_out_of_bounds: whether to remap out-of-bounds values to the nearest bound.
-            class_voting: If classification task and return_prob is False, determines
+            voting_type: If classification task and return_prob is False, determines
                 if soft or hard ensemble voting is performed.
 
         Returns:
@@ -189,7 +189,7 @@ class OR only return the most probable class.
         if (
             not return_prob
             and max(self.num_classes.values()) >= 2
-            and class_voting == "soft"
+            and voting_type == "soft"
         ):
             return_prob_comput = True
 
@@ -205,7 +205,7 @@ class OR only return the most probable class.
         p_columns = p.columns
         if max(self.num_classes.values()) == 0 or return_prob:
             p_mean = np.array(all_predictions).mean(axis=0)
-        elif class_voting == "soft":
+        elif voting_type == "soft":
             p_columns, p_mean = [], []
             for prop in set(["_".join(s.split("_")[:-2]) for s in p.columns]):
                 prop_ids = [
@@ -222,7 +222,7 @@ class OR only return the most probable class.
                 arr=np.array(all_predictions),
             )
 
-        p_std = np.array(all_predictions).std(axis=0)  # TODO adapt for soft voting
+        p_std = np.array(all_predictions).std(axis=0)
         df_mean = pd.DataFrame(p_mean, index=p.index, columns=p_columns)
         df_std = pd.DataFrame(p_std, index=p.index, columns=p.columns)
 

diff --git a/modnet/tests/test_model.py b/modnet/tests/test_model.py
@@ -273,7 +273,53 @@ def is_metal(egap):
 
     model.fit(data, epochs=2)
     model.predict(data)
-    model.predict(data, return_unc=True)
+    model.predict(data, return_prob=False, voting_type="soft", return_unc=True)
+    model.predict(data, return_prob=False, voting_type="hard", return_unc=True)
+    model.predict(data, return_prob=True, return_unc=True)
+    assert not np.isnan(model.evaluate(data))
+
+
+def test_train_small_bootstrap_multi_target_classif(small_moddata, tf_session):
+    """Tests the multi target classification training."""
+    from modnet.models import EnsembleMODNetModel
+
+    data = small_moddata
+    # set 'optimal' features manually
+    data.optimal_features = [
+        col for col in data.df_featurized.columns if col.startswith("ElementProperty")
+    ]
+
+    def is_metal(egap):
+        if egap == 0:
+            return 1
+        else:
+            return 0
+
+    def eform_cl(eform):
+        if eform > 0:
+            return 1
+        else:
+            return 0
+
+    data.df_targets["is_metal"] = data.df_targets["egap"].apply(is_metal)
+    data.df_targets["eform_cl"] = data.df_targets["eform"].apply(eform_cl)
+    model = EnsembleMODNetModel(
+        [[["eform_cl"], ["is_metal"]]],
+        weights={
+            "eform_cl": 1,
+            "is_metal": 1,
+        },
+        num_neurons=[[16], [8], [8], [4]],
+        num_classes={"eform_cl": 2, "is_metal": 2},
+        n_feat=10,
+        n_models=3,
+        bootstrap=True,
+    )
+
+    model.fit(data, epochs=2)
+    model.predict(data, return_prob=True, return_unc=True)
+    model.predict(data, return_prob=False, voting_type="soft", return_unc=True)
+    model.predict(data, return_prob=False, voting_type="hard", return_unc=True)
     assert not np.isnan(model.evaluate(data))