From add21ca48fae417a3f2e4e3e778dca87783f6f07 Mon Sep 17 00:00:00 2001 From: Markus Bilz Date: Mon, 4 Dec 2023 15:55:11 +0100 Subject: [PATCH] Make optional parameter in (#11) --- README.md | 6 ++---- docs/index.md | 6 ++---- sonar-project.properties | 5 +++-- src/tclf/classical_classifier.py | 29 ++++++++++++++++------------- tests/test_classical_classifier.py | 15 +++++++-------- 5 files changed, 30 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index d870350..3189344 100644 --- a/README.md +++ b/README.md @@ -40,18 +40,16 @@ X = pd.DataFrame( ], columns=["trade_price", "bid_ex", "ask_ex"], ) -y = pd.Series([1, 1, 1, 1, 1, 1]) clf = ClassicalClassifier(layers=[("quote", "ex")], strategy="random") -clf.fit(X, y) +clf.fit(X) probs = clf.predict_proba(X) -print(probs) ``` Run your script with ```console python main.py ``` -In this example, input data is available as a pd.DataFrame/Series with columns conforming to our [naming conventions](https://karelze.github.io/tclf/naming_conventions/). +In this example, input data is available as a pd.DataFrame with columns conforming to our [naming conventions](https://karelze.github.io/tclf/naming_conventions/). The parameter `layers=[("quote", "ex")]` sets the quote rule at the exchange level and `strategy="random"` specifies the fallback strategy for unclassified trades. The true label `y` is not used in classification and only for API consistency by convention. diff --git a/docs/index.md b/docs/index.md index d870350..3189344 100644 --- a/docs/index.md +++ b/docs/index.md @@ -40,18 +40,16 @@ X = pd.DataFrame( ], columns=["trade_price", "bid_ex", "ask_ex"], ) -y = pd.Series([1, 1, 1, 1, 1, 1]) clf = ClassicalClassifier(layers=[("quote", "ex")], strategy="random") -clf.fit(X, y) +clf.fit(X) probs = clf.predict_proba(X) -print(probs) ``` Run your script with ```console python main.py ``` -In this example, input data is available as a pd.DataFrame/Series with columns conforming to our [naming conventions](https://karelze.github.io/tclf/naming_conventions/). +In this example, input data is available as a pd.DataFrame with columns conforming to our [naming conventions](https://karelze.github.io/tclf/naming_conventions/). The parameter `layers=[("quote", "ex")]` sets the quote rule at the exchange level and `strategy="random"` specifies the fallback strategy for unclassified trades. The true label `y` is not used in classification and only for API consistency by convention. diff --git a/sonar-project.properties b/sonar-project.properties index b366bc0..6013169 100644 --- a/sonar-project.properties +++ b/sonar-project.properties @@ -1,2 +1,3 @@ -sonar.sources=src\ -sonar.issue.ignore.multicriteria.e1.ruleKey=python:S117 +sonar.issue.ignore.multicriteria=S117 +sonar.issue.ignore.multicriteria.S117.ruleKey=python:S117 +sonar.issue.ignore.multicriteria.S117.resourceKey=* diff --git a/src/tclf/classical_classifier.py b/src/tclf/classical_classifier.py index e106f2a..bee7983 100644 --- a/src/tclf/classical_classifier.py +++ b/src/tclf/classical_classifier.py @@ -12,8 +12,7 @@ import pandas as pd from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils import check_random_state -from sklearn.utils.multiclass import check_classification_targets -from sklearn.utils.validation import _check_sample_weight, check_is_fitted, check_X_y +from sklearn.utils.validation import _check_sample_weight, check_is_fitted from tclf.types import ArrayLike, MatrixLike @@ -79,9 +78,8 @@ def __init__( ... ], ... columns=["trade_price", "bid_ex", "ask_ex"], ... ) - >>> y = pd.Series([-1, 1, 1, -1, -1, 1]) >>> clf = ClassicalClassifier(layers=[("quote", "ex")], strategy="const") - >>> clf.fit(X, y) + >>> clf.fit(X) ClassicalClassifier(layers=[('quote', 'ex')], strategy='const') >>> pred = clf.predict_proba(X) @@ -387,14 +385,14 @@ def _nan(self, subset: str) -> npt.NDArray: def fit( self, X: MatrixLike, - y: ArrayLike, + y: ArrayLike | None = None, sample_weight: npt.NDArray | None = None, ) -> ClassicalClassifier: """Fit the classifier. Args: X (MatrixLike): features - y (ArrayLike): ground truth (ignored) + y (ArrayLike | None, optional): ignored, present here for API consistency by convention. sample_weight (npt.NDArray | None, optional): Sample weights. Defaults to None. Raises: @@ -429,14 +427,13 @@ def fit( if isinstance(X, pd.DataFrame): self.columns_ = X.columns.tolist() - check_classification_targets(y) - - X, y = check_X_y( - X, y, multi_output=False, accept_sparse=False, force_all_finite=False + X = self._validate_data( + X, + dtype=[np.float64, np.float32], + accept_sparse=False, + force_all_finite=False, ) - # FIXME: make flexible if open-sourced - # self.classes_ = np.unique(y) self.classes_ = np.array([-1, 1]) # if no features are provided or inferred, use default @@ -467,6 +464,12 @@ def predict(self, X: MatrixLike) -> npt.NDArray: npt.NDArray: Predicted traget values for X. """ check_is_fitted(self) + X = self._validate_data( + X, + dtype=[np.float64, np.float32], + accept_sparse=False, + force_all_finite=False, + ) rs = check_random_state(self.random_state) @@ -514,7 +517,7 @@ def predict_proba(self, X: MatrixLike) -> npt.NDArray: mask = np.flatnonzero(preds) # get index of predicted class and one-hot encode it - indices = np.where(preds[mask, None] == self.classes_[None, :])[1] + indices = np.nonzero(preds[mask, None] == self.classes_[None, :])[1] n_classes = np.max(self.classes_) + 1 # overwrite defaults with one-hot encoded classes. diff --git a/tests/test_classical_classifier.py b/tests/test_classical_classifier.py index 448d89d..b92398e 100644 --- a/tests/test_classical_classifier.py +++ b/tests/test_classical_classifier.py @@ -24,7 +24,6 @@ def setup(self) -> None: self.x_train = pd.DataFrame( [[1, 1], [1, 1], [1, 1], [1, 1]], columns=["ask_best", "bid_best"] ) - self.y_train = pd.Series([1, 1, -1, -1]) self.x_test = pd.DataFrame( [[1, 2], [3, 4], [1, 2], [3, 4]], columns=["ask_best", "bid_best"] ) @@ -32,7 +31,7 @@ def setup(self) -> None: self.clf = ClassicalClassifier( layers=[("nan", "ex")], random_state=7, - ).fit(self.x_train, self.y_train) + ).fit(self.x_train) def test_random_state(self) -> None: """Test, if random state is correctly set. @@ -42,13 +41,13 @@ def test_random_state(self) -> None: first_classifier = ClassicalClassifier( layers=[("nan", "ex")], random_state=50, - ).fit(self.x_train, self.y_train) + ).fit(self.x_train) first_y_pred = first_classifier.predict(self.x_test) second_classifier = ClassicalClassifier( layers=[("nan", "ex")], random_state=50, - ).fit(self.x_train, self.y_train) + ).fit(self.x_train) second_y_pred = second_classifier.predict(self.x_test) assert (first_y_pred == second_y_pred).all() @@ -61,7 +60,7 @@ def test_fit(self) -> None: fitted_classifier = ClassicalClassifier( layers=[("nan", "ex")], random_state=42, - ).fit(self.x_train, self.y_train) + ).fit(self.x_train) assert check_is_fitted(fitted_classifier) is None def test_strategy_const(self) -> None: @@ -72,7 +71,7 @@ def test_strategy_const(self) -> None: """ fitted_classifier = ClassicalClassifier( layers=[("nan", "ex")], strategy="const" - ).fit(self.x_train, self.y_train) + ).fit(self.x_train) assert (fitted_classifier.predict_proba(self.x_test) == 0.5).all() def test_invalid_func(self) -> None: @@ -86,7 +85,7 @@ def test_invalid_func(self) -> None: random_state=42, ) with pytest.raises(ValueError, match=r"Unknown function string"): - classifier.fit(self.x_train, self.y_train) + classifier.fit(self.x_train) def test_invalid_col_length(self) -> None: """Test, if only valid column length can be passed. @@ -100,7 +99,7 @@ def test_invalid_col_length(self) -> None: layers=[("tick", "all")], random_state=42, features=["one"] ) with pytest.raises(ValueError, match=r"Expected"): - classifier.fit(self.x_train.values, self.y_train.values) + classifier.fit(self.x_train.values) def test_override(self) -> None: """Test, if classifier does not override valid results from layer one.