Skip to content

Commit

Permalink
Make optional parameter in (#11)
Browse files Browse the repository at this point in the history
  • Loading branch information
KarelZe authored Dec 4, 2023
1 parent c915346 commit add21ca
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 31 deletions.
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,16 @@ X = pd.DataFrame(
],
columns=["trade_price", "bid_ex", "ask_ex"],
)
y = pd.Series([1, 1, 1, 1, 1, 1])

clf = ClassicalClassifier(layers=[("quote", "ex")], strategy="random")
clf.fit(X, y)
clf.fit(X)
probs = clf.predict_proba(X)
print(probs)
```
Run your script with
```console
python main.py
```
In this example, input data is available as a pd.DataFrame/Series with columns conforming to our [naming conventions](https://karelze.github.io/tclf/naming_conventions/).
In this example, input data is available as a pd.DataFrame with columns conforming to our [naming conventions](https://karelze.github.io/tclf/naming_conventions/).

The parameter `layers=[("quote", "ex")]` sets the quote rule at the exchange level and `strategy="random"` specifies the fallback strategy for unclassified trades. The true label `y` is not used in classification and only for API consistency by convention.

Expand Down
6 changes: 2 additions & 4 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,16 @@ X = pd.DataFrame(
],
columns=["trade_price", "bid_ex", "ask_ex"],
)
y = pd.Series([1, 1, 1, 1, 1, 1])

clf = ClassicalClassifier(layers=[("quote", "ex")], strategy="random")
clf.fit(X, y)
clf.fit(X)
probs = clf.predict_proba(X)
print(probs)
```
Run your script with
```console
python main.py
```
In this example, input data is available as a pd.DataFrame/Series with columns conforming to our [naming conventions](https://karelze.github.io/tclf/naming_conventions/).
In this example, input data is available as a pd.DataFrame with columns conforming to our [naming conventions](https://karelze.github.io/tclf/naming_conventions/).

The parameter `layers=[("quote", "ex")]` sets the quote rule at the exchange level and `strategy="random"` specifies the fallback strategy for unclassified trades. The true label `y` is not used in classification and only for API consistency by convention.

Expand Down
5 changes: 3 additions & 2 deletions sonar-project.properties
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
sonar.sources=src\
sonar.issue.ignore.multicriteria.e1.ruleKey=python:S117
sonar.issue.ignore.multicriteria=S117
sonar.issue.ignore.multicriteria.S117.ruleKey=python:S117
sonar.issue.ignore.multicriteria.S117.resourceKey=*
29 changes: 16 additions & 13 deletions src/tclf/classical_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
import pandas as pd
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils import check_random_state
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import _check_sample_weight, check_is_fitted, check_X_y
from sklearn.utils.validation import _check_sample_weight, check_is_fitted

from tclf.types import ArrayLike, MatrixLike

Expand Down Expand Up @@ -79,9 +78,8 @@ def __init__(
... ],
... columns=["trade_price", "bid_ex", "ask_ex"],
... )
>>> y = pd.Series([-1, 1, 1, -1, -1, 1])
>>> clf = ClassicalClassifier(layers=[("quote", "ex")], strategy="const")
>>> clf.fit(X, y)
>>> clf.fit(X)
ClassicalClassifier(layers=[('quote', 'ex')], strategy='const')
>>> pred = clf.predict_proba(X)
Expand Down Expand Up @@ -387,14 +385,14 @@ def _nan(self, subset: str) -> npt.NDArray:
def fit(
self,
X: MatrixLike,
y: ArrayLike,
y: ArrayLike | None = None,
sample_weight: npt.NDArray | None = None,
) -> ClassicalClassifier:
"""Fit the classifier.
Args:
X (MatrixLike): features
y (ArrayLike): ground truth (ignored)
y (ArrayLike | None, optional): ignored, present here for API consistency by convention.
sample_weight (npt.NDArray | None, optional): Sample weights. Defaults to None.
Raises:
Expand Down Expand Up @@ -429,14 +427,13 @@ def fit(
if isinstance(X, pd.DataFrame):
self.columns_ = X.columns.tolist()

check_classification_targets(y)

X, y = check_X_y(
X, y, multi_output=False, accept_sparse=False, force_all_finite=False
X = self._validate_data(
X,
dtype=[np.float64, np.float32],
accept_sparse=False,
force_all_finite=False,
)

# FIXME: make flexible if open-sourced
# self.classes_ = np.unique(y)
self.classes_ = np.array([-1, 1])

# if no features are provided or inferred, use default
Expand Down Expand Up @@ -467,6 +464,12 @@ def predict(self, X: MatrixLike) -> npt.NDArray:
npt.NDArray: Predicted traget values for X.
"""
check_is_fitted(self)
X = self._validate_data(
X,
dtype=[np.float64, np.float32],
accept_sparse=False,
force_all_finite=False,
)

rs = check_random_state(self.random_state)

Expand Down Expand Up @@ -514,7 +517,7 @@ def predict_proba(self, X: MatrixLike) -> npt.NDArray:
mask = np.flatnonzero(preds)

# get index of predicted class and one-hot encode it
indices = np.where(preds[mask, None] == self.classes_[None, :])[1]
indices = np.nonzero(preds[mask, None] == self.classes_[None, :])[1]
n_classes = np.max(self.classes_) + 1

# overwrite defaults with one-hot encoded classes.
Expand Down
15 changes: 7 additions & 8 deletions tests/test_classical_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,14 @@ def setup(self) -> None:
self.x_train = pd.DataFrame(
[[1, 1], [1, 1], [1, 1], [1, 1]], columns=["ask_best", "bid_best"]
)
self.y_train = pd.Series([1, 1, -1, -1])
self.x_test = pd.DataFrame(
[[1, 2], [3, 4], [1, 2], [3, 4]], columns=["ask_best", "bid_best"]
)
self.y_test = pd.Series([1, -1, 1, -1])
self.clf = ClassicalClassifier(
layers=[("nan", "ex")],
random_state=7,
).fit(self.x_train, self.y_train)
).fit(self.x_train)

def test_random_state(self) -> None:
"""Test, if random state is correctly set.
Expand All @@ -42,13 +41,13 @@ def test_random_state(self) -> None:
first_classifier = ClassicalClassifier(
layers=[("nan", "ex")],
random_state=50,
).fit(self.x_train, self.y_train)
).fit(self.x_train)
first_y_pred = first_classifier.predict(self.x_test)

second_classifier = ClassicalClassifier(
layers=[("nan", "ex")],
random_state=50,
).fit(self.x_train, self.y_train)
).fit(self.x_train)
second_y_pred = second_classifier.predict(self.x_test)

assert (first_y_pred == second_y_pred).all()
Expand All @@ -61,7 +60,7 @@ def test_fit(self) -> None:
fitted_classifier = ClassicalClassifier(
layers=[("nan", "ex")],
random_state=42,
).fit(self.x_train, self.y_train)
).fit(self.x_train)
assert check_is_fitted(fitted_classifier) is None

def test_strategy_const(self) -> None:
Expand All @@ -72,7 +71,7 @@ def test_strategy_const(self) -> None:
"""
fitted_classifier = ClassicalClassifier(
layers=[("nan", "ex")], strategy="const"
).fit(self.x_train, self.y_train)
).fit(self.x_train)
assert (fitted_classifier.predict_proba(self.x_test) == 0.5).all()

def test_invalid_func(self) -> None:
Expand All @@ -86,7 +85,7 @@ def test_invalid_func(self) -> None:
random_state=42,
)
with pytest.raises(ValueError, match=r"Unknown function string"):
classifier.fit(self.x_train, self.y_train)
classifier.fit(self.x_train)

def test_invalid_col_length(self) -> None:
"""Test, if only valid column length can be passed.
Expand All @@ -100,7 +99,7 @@ def test_invalid_col_length(self) -> None:
layers=[("tick", "all")], random_state=42, features=["one"]
)
with pytest.raises(ValueError, match=r"Expected"):
classifier.fit(self.x_train.values, self.y_train.values)
classifier.fit(self.x_train.values)

def test_override(self) -> None:
"""Test, if classifier does not override valid results from layer one.
Expand Down

0 comments on commit add21ca

Please sign in to comment.