diff --git a/doc/custom_metrics.rst b/doc/custom_metrics.rst index 238ee77d..5f0219c4 100644 --- a/doc/custom_metrics.rst +++ b/doc/custom_metrics.rst @@ -12,15 +12,27 @@ Writing Custom Metric Functions First, let's look at how to write valid custom metric functions. A valid custom metric function must take two array-like positional arguments: the first being the true labels or scores, and the -second being the predicted labels or scores. This function can also take three optional keyword arguments: +second being the predicted labels or scores. This function can also take two optional keyword arguments: 1. ``greater_is_better``: a boolean keyword argument that indicates whether a higher value of the metric indicates better performance (``True``) or vice versa (``False``). The default value is ``True``. -2. ``needs_proba``: a boolean keyword argument that indicates whether the metric function requires probability estimates. The default value is ``False``. -3. ``needs_threshold``: a boolean keyword argument that indicates whether the metric function takes a continuous decision certainty. The default value is ``False``. + +2. ``response_method`` : a string keyword argument that specifies the response method to use to get predictions from an estimator. Possible values are: + + - ``"predict"`` : uses estimator's `predict() `__ method to get class labels + - ``"predict_proba"`` : uses estimator's `predict_proba() `__ method to get class probabilities + - ``"decision_function"`` : uses estimator's `decision_function() `__ method to get continuous decision function values + - If the value is a list or tuple of the above strings, it indicates that the scorer should use the first method in the list which is implemented by the estimator. + - If the value is ``None``, it is the same as ``"predict"``. + + The default value for ``response_method`` is ``None``. Note that these keyword arguments are identical to the keyword arguments for the `sklearn.metrics.make_scorer() `_ function and serve the same purpose. -In short, custom metric functions take two required positional arguments (order matters) and three optional keyword arguments. Here's a simple example of a custom metric function: F\ :sub:`β` with β=0.75 defined in a file called ``custom.py``. +.. important:: + + Previous versions of SKLL offered the ``needs_proba`` and ``needs_threshold`` keyword arguments for custom metrics but these are now deprecated in scikit-learn and replaced by the ``response_method`` keyword argument. To replicate the behavior of ``needs_proba=True``, use ``response_method="predict_proba"`` instead and to replicate ``needs_threshold=True``, use ``response_method=("decision_function", "predict_proba")`` instead. + +In short, custom metric functions take two required positional arguments (order matters) and two optional keyword arguments. Here's a simple example of a custom metric function: F\ :sub:`β` with β=0.75 defined in a file called ``custom.py``. .. code-block:: python :caption: custom.py @@ -30,7 +42,6 @@ In short, custom metric functions take two required positional arguments (order def f075(y_true, y_pred): return fbeta_score(y_true, y_pred, beta=0.75) - Obviously, you may write much more complex functions that aren't directly available in scikit-learn. Once you have written your metric function, the next step is to use it in your SKLL experiment. diff --git a/skll/learner/__init__.py b/skll/learner/__init__.py index 12ca6a82..d77cf8b7 100644 --- a/skll/learner/__init__.py +++ b/skll/learner/__init__.py @@ -252,10 +252,12 @@ def __init__( "produce probabilities, results will not be exactly " "replicable when using SVC and probability mode." ) + elif issubclass(self._model_type, AdaBoostClassifier): + self._model_kwargs["algorithm"] = "SAMME" + self._model_kwargs["n_estimators"] = 500 elif issubclass( self._model_type, ( - AdaBoostClassifier, AdaBoostRegressor, BaggingClassifier, BaggingRegressor, @@ -268,6 +270,8 @@ def __init__( self._model_kwargs["n_estimators"] = 500 elif issubclass(self._model_type, DummyClassifier): self._model_kwargs["strategy"] = "prior" + elif issubclass(self._model_type, (LinearSVC, LinearSVR)): + self._model_kwargs["dual"] = "auto" elif issubclass(self._model_type, SVR): self._model_kwargs["cache_size"] = 1000 self._model_kwargs["gamma"] = "scale" @@ -950,7 +954,7 @@ def train( metrics_module = import_module("skll.metrics") metric_func = getattr(metrics_module, "correlation") _CUSTOM_METRICS[new_grid_objective] = make_scorer( - metric_func, corr_type=grid_objective, needs_proba=True + metric_func, corr_type=grid_objective, response_method="predict_proba" ) grid_objective = new_grid_objective diff --git a/skll/metrics.py b/skll/metrics.py index f11c7c1f..68393cbc 100644 --- a/skll/metrics.py +++ b/skll/metrics.py @@ -298,7 +298,7 @@ def register_custom_metric(custom_metric_path: PathOrStr, custom_metric_name: st # extract any "special" keyword arguments from the metric function metric_func_parameters = signature(metric_func).parameters make_scorer_kwargs = {} - for make_scorer_kwarg in ["greater_is_better", "needs_proba", "needs_threshold"]: + for make_scorer_kwarg in ["greater_is_better", "response_method"]: if make_scorer_kwarg in metric_func_parameters: parameter = metric_func_parameters.get(make_scorer_kwarg) if parameter is not None: diff --git a/tests/configs/test_send_warnings_to_log.template.cfg b/tests/configs/test_send_warnings_to_log.template.cfg index f7601d9e..94782b0c 100644 --- a/tests/configs/test_send_warnings_to_log.template.cfg +++ b/tests/configs/test_send_warnings_to_log.template.cfg @@ -4,7 +4,7 @@ task=cross_validate [Input] featuresets=[["test_send_warnings_to_log"]] -learners=["LinearSVC"] +learners=["DummyClassifier"] suffix=.jsonlines num_cv_folds=2 diff --git a/tests/other/custom_metrics.py b/tests/other/custom_metrics.py index c8f314f3..66a1699d 100644 --- a/tests/other/custom_metrics.py +++ b/tests/other/custom_metrics.py @@ -1,3 +1,4 @@ +"""Custom metrics for testing purposes.""" from sklearn.metrics import ( average_precision_score, f1_score, @@ -8,31 +9,31 @@ ) -def f075_macro(y_true, y_pred): +def f075_macro(y_true, y_pred): # noqa: D103 return fbeta_score(y_true, y_pred, beta=0.75, average="macro") -def ratio_of_ones(y_true, y_pred): +def ratio_of_ones(y_true, y_pred): # noqa: D103 true_ones = [label for label in y_true if label == 1] pred_ones = [label for label in y_pred if label == 1] return len(pred_ones) / (len(true_ones) + len(pred_ones)) -def r2(y_true, y_pred): +def r2(y_true, y_pred): # noqa: D103 return r2_score(y_true, y_pred) -def one_minus_precision(y_true, y_pred, greater_is_better=False): +def one_minus_precision(y_true, y_pred, greater_is_better=False): # noqa: D103 return 1 - precision_score(y_true, y_pred, average="binary") -def one_minus_f1_macro(y_true, y_pred, greater_is_better=False): +def one_minus_f1_macro(y_true, y_pred, greater_is_better=False): # noqa: D103 return 1 - f1_score(y_true, y_pred, average="macro") -def fake_prob_metric(y_true, y_pred, needs_proba=True): +def fake_prob_metric(y_true, y_pred, response_method="predict_proba"): # noqa: D103 return average_precision_score(y_true, y_pred) -def fake_prob_metric_multiclass(y_true, y_pred, needs_proba=True): +def fake_prob_metric_multiclass(y_true, y_pred, response_method="predict_proba"): # noqa: D103 return roc_auc_score(y_true, y_pred, average="macro", multi_class="ovo") diff --git a/tests/test_classification.py b/tests/test_classification.py index 16349093..c7f90d21 100644 --- a/tests/test_classification.py +++ b/tests/test_classification.py @@ -542,7 +542,7 @@ def test_sparse_predict(self): # noqa: D103 (0.45, 0.52), (0.52, 0.5), (0.48, 0.5), - (0.49, 0.5), + (0.5, 0.5), (0.54, 0.5), (0.43, 0), (0.53, 0.57), @@ -814,8 +814,8 @@ def check_adaboost_predict(self, base_estimator, algorithm, expected_score): def test_adaboost_predict(self): # noqa: D103 for base_estimator_name, algorithm, expected_score in zip( ["MultinomialNB", "DecisionTreeClassifier", "SGDClassifier", "SVC"], - ["SAMME.R", "SAMME.R", "SAMME", "SAMME"], - [0.46, 0.52, 0.46, 0.5], + ["SAMME", "SAMME", "SAMME", "SAMME"], + [0.49, 0.52, 0.46, 0.5], ): yield self.check_adaboost_predict, base_estimator_name, algorithm, expected_score diff --git a/tests/test_output.py b/tests/test_output.py index ab746e62..fac7c8d2 100644 --- a/tests/test_output.py +++ b/tests/test_output.py @@ -1208,12 +1208,11 @@ def test_send_warnings_to_log(self): # Check experiment log output # The experiment log file should contain warnings related # to the use of sklearn - with open(output_dir / "test_send_warnings_to_log_LinearSVC.log") as f: + with open(output_dir / "test_send_warnings_to_log_DummyClassifier.log") as f: log_content = f.read() convergence_sklearn_warning_re = re.compile( - r"WARNING - [^\n]+sklearn.svm._base\.py:\d+: ConvergenceWarning:" - r"Liblinear failed to converge, increase the number of iterations" - r"\." + r"WARNING - [^\n]+sklearn.metrics._classification\.py:\d+: " + r"UndefinedMetricWarning:Precision is ill-defined and being set to 0.0" ) assert convergence_sklearn_warning_re.search(log_content) is not None