Skip to content

Commit

Permalink
Add and apply ruff linting
Browse files Browse the repository at this point in the history
  • Loading branch information
KarelZe committed Oct 1, 2023
1 parent 6085597 commit 89300ff
Show file tree
Hide file tree
Showing 44 changed files with 603 additions and 677 deletions.
45 changes: 10 additions & 35 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,52 +24,27 @@ repos:
- id: debug-statements
- id: end-of-file-fixer
- id: mixed-line-ending
- repo: https://github.com/PyCQA/autoflake
rev: v1.7.7
hooks:
- id: autoflake
args:
# - "--check"
- "--ignore-init-module-imports"
- "--remove-all-unused-imports"
- "--remove-unused-variables"
- repo: https://github.com/PyCQA/isort
rev: 5.10.1
hooks:
- id: isort
name: isort (python)
# args:
# - "--check-only"
- id: trailing-whitespace
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.991
hooks:
- id: mypy
# yaml requires additional stubs.
# Similar to: https://stackoverflow.com/a/73603491/5755604
additional_dependencies: ['types-PyYAML']
- repo: https://github.com/pre-commit/pygrep-hooks
rev: v1.9.0
hooks:
- id: python-use-type-annotations
- repo: https://github.com/psf/black.git
rev: 22.10.0
hooks:
- id: black
# args:
# - "--check"
language_version: python3
exclude: ^(tests\/hooks-abort-render\/hooks|docs)
- repo: https://github.com/pycqa/flake8
rev: 5.0.4
exclude: ^(docs)
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.284
hooks:
- id: flake8
additional_dependencies:
- flake8-absolute-import
- flake8-black
- flake8-docstrings
- flake8-bugbear
- repo: https://github.com/asottile/pyupgrade
rev: v3.2.2
- id: ruff
args:
- --fix
- repo: https://github.com/kynan/nbstripout
rev: 0.6.0
hooks:
- id: pyupgrade
- id: nbstripout
exclude: "^(references|reports)"
42 changes: 42 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ dev = [
"pre-commit",
"pytest",
"pytest-cov",
"ruff",
"sphinx",
"tox",
]
Expand Down Expand Up @@ -100,3 +101,44 @@ omit = [
"debug_*.py",
"tests/*",
]

[tool.ruff]
# See rules: https://beta.ruff.rs/docs/rules/
select = [
"A", # flake8-builtins
"B", # flake8-bugbear
"C", # flake8-comprehensions
"D", # pydocstyle
"E", # pycodestyle errors
"F", # pyflakes
"I", # isort
"N", # pep8-naming
"NPY", # numpy
"PD", # pandas-vet
"PT", # pytest
"PTH", # flake8-use-pathlib
"PGH", # pygrep
"RET", # return
"RUF", # ruff-specific rules
"UP", # pyupgrade
"S", # flake8-bandit
"SIM", # flake8-simplify
"W", # pycodestyle warnings
]

include = ["*.py", "*.pyi", "**/pyproject.toml", "*.ipynb"]

ignore = [
"E501", # line too long, handled by black
"N803", # argument name should be lowercase
"N806", # variable name should be lowercase
"C901", # too complex
]

[tool.ruff.isort]
known-first-party = ["otc"]
section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]

[tool.ruff.per-file-ignores]
"__init__.py" = ["D104", "F401"] # disable missing docstrings in __init__, unused imports
"tests/*" = ["S101"] # Use of `assert` detected
3 changes: 1 addition & 2 deletions src/otc/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
Support for custom code.
"""Support for custom code.
See `readme.md` for instructions on how to run.
"""
Expand Down
9 changes: 3 additions & 6 deletions src/otc/config/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
Holds configuration for folders, dbs, and wandb configuration.
"""Holds configuration for folders, dbs, and wandb configuration.
See also `prod.env`.
"""
Expand All @@ -10,8 +9,7 @@


class Settings(BaseSettings):
"""
Specifies settings.
"""Specifies settings.
Mainly W&B, GCS and Heroku.
"""
Expand All @@ -26,8 +24,7 @@ class Settings(BaseSettings):
MODEL_DIR_REMOTE: Path

class Config:
"""
Specifies configuration.
"""Specifies configuration.
Filename is given by "prod.env". Keys are case-sensitive.
"""
Expand Down
3 changes: 1 addition & 2 deletions src/otc/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
Support for data.
"""Support for data.
See `readme.md` for instructions on how to run.
"""
33 changes: 17 additions & 16 deletions src/otc/data/dataloader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
A fast dataloader-like object to load batches of tabular data sets.
"""A fast dataloader-like object to load batches of tabular data sets.
Adapted from here:
https://discuss.pytorch.org/t/dataloader-much-slower-than-manual-batching/27014/6
Expand All @@ -12,8 +11,7 @@


class TabDataLoader:
"""
PyTorch Implementation of a dataloader for tabular data.
"""PyTorch Implementation of a dataloader for tabular data.
Due to a chunk-wise reading or several rows at once it is preferred
over the standard dataloader that reads row-wise.
Expand All @@ -27,12 +25,12 @@ def __init__(
device: str = "cpu",
**kwargs: Any,
):
"""
TabDataLoader.
"""TabDataLoader.
Tensors can be None e. g., if there is no categorical data.
Args:
----
batch_size (int, optional): size of batch. Defaults to 4096.
shuffle (bool, optional): shuffle data. Defaults to False.
device (str, optional): device where. Defaults to "cpu".
Expand All @@ -57,10 +55,10 @@ def __init__(
self.n_batches = n_batches

def __iter__(self) -> TabDataLoader:
"""
Return itself.
"""Return itself.
Returns:
Returns
-------
TabDataLoader: TabDataLoader
"""
if self.shuffle:
Expand All @@ -71,13 +69,16 @@ def __iter__(self) -> TabDataLoader:
return self

def __next__(self) -> tuple[torch.Tensor | None, ...]:
"""
Generate next batch with size of 'batch_size'.
"""Generate next batch with size of 'batch_size'.
Batches can be underful.
Raises:
Raises
------
StopIteration: stopping criterion.
Returns:
Returns
-------
Tuple[torch.Tensor | None, torch.Tensor, torch.Tensor]: (X_cat), X_cont,
weight, y
"""
Expand All @@ -96,10 +97,10 @@ def __next__(self) -> tuple[torch.Tensor | None, ...]:
return tuple(mixed_batch)

def __len__(self) -> int:
"""
Get number of full and partial batches in data set.
"""Get number of full and partial batches in data set.
Returns:
Returns
-------
int: number of batches.
"""
return self.n_batches
29 changes: 15 additions & 14 deletions src/otc/data/dataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
Implementation of a dataset for tabular data.
"""Implementation of a dataset for tabular data.
Supports both categorical and continous data.
"""
Expand All @@ -16,6 +15,7 @@ class TabDataset(Dataset):
"""PyTorch Dataset for tabular data.
Args:
----
Dataset (Dataset): dataset
"""

Expand All @@ -28,13 +28,13 @@ def __init__(
cat_features: list[str] | None = None,
cat_unique_counts: tuple[int, ...] | None = None,
):
"""
Tabular data set holding data for the model.
"""Tabular data set holding data for the model.
Data set is inspired by CatBoost's Pool class:
https://catboost.ai/en/docs/concepts/python-reference_pool
Args:
----
x (pd.DataFrame | npt.ndarray): feature matrix
y (pd.Series | npt.ndarray): target
weight (pd.Series | npt.ndarray | None, optional): weights of samples. If
Expand All @@ -48,7 +48,7 @@ def __init__(
cat_unique_counts (tuple[int, ...] | None, optional): Number of categories
per categorical feature. Defaults to None.
"""
self._cat_unique_counts = () if not cat_unique_counts else cat_unique_counts
self._cat_unique_counts = cat_unique_counts if cat_unique_counts else ()
feature_names = [] if feature_names is None else feature_names
# infer feature names from dataframe.
if isinstance(x, pd.DataFrame):
Expand All @@ -58,7 +58,7 @@ def __init__(
), "`len('feature_names)` must match `X.shape[1]`"

# calculate cat indices
cat_features = [] if not cat_features else cat_features
cat_features = cat_features if cat_features else []
assert set(cat_features).issubset(
feature_names
), "Categorical features must be a subset of feature names."
Expand All @@ -74,9 +74,9 @@ def __init__(
]

# pd 2 np
x = x.values if isinstance(x, pd.DataFrame) else x
y = y.values if isinstance(y, pd.Series) else y
weight = weight.values if isinstance(weight, pd.Series) else weight
x = x.to_numpy() if isinstance(x, pd.DataFrame) else x
y = y.to_numpy() if isinstance(y, pd.Series) else y
weight = weight.to_numpy() if isinstance(weight, pd.Series) else weight

assert (
x.shape[0] == y.shape[0]
Expand Down Expand Up @@ -112,24 +112,25 @@ def __init__(
self.weight = weight

def __len__(self) -> int:
"""
Length of dataset.
"""Length of dataset.
Returns:
Returns
-------
int: length
"""
return len(self.x_cont)

def __getitem__(
self, idx: int
) -> tuple[torch.Tensor | None, torch.Tensor, torch.Tensor, torch.Tensor]:
"""
Get sample for model.
"""Get sample for model.
Args:
----
idx (int): index of item.
Returns:
-------
Tuple[torch.Tensor | None, torch.Tensor, torch.Tensor torch.Tensor]:
x_cat (if present if present otherwise None), x_cont, weight and y.
"""
Expand Down
9 changes: 4 additions & 5 deletions src/otc/data/fs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
Gives simple access to the google cloud storage bucket.
"""Gives simple access to the google cloud storage bucket.
Instance is only created once.
"""
Expand All @@ -13,10 +12,10 @@


def _create_environment() -> gcsfs.GCSFileSystem:
"""
Implement the global object pattern to connect only once to GCS.
"""Implement the global object pattern to connect only once to GCS.
Returns:
Returns
-------
gcsfs.GCSFileSystem: Instance of GCSFileSystem.
"""
gcloud_config = str(Path(settings.GCS_CRED_FILE).expanduser().resolve())
Expand Down
7 changes: 3 additions & 4 deletions src/otc/data/make_dataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
Script to pre-process the raw data set.
"""Script to pre-process the raw data set.
See `notebooks/` for further details.
"""
Expand All @@ -16,11 +15,11 @@
@click.argument("input_filepath", type=click.Path(exists=True))
@click.argument("output_filepath", type=click.Path())
def main(input_filepath: click.Path, output_filepath: click.Path) -> None:
"""
Run data processing scripts to turn raw data from (../raw) into\
"""Run data processing scripts to turn raw data from (../raw) into\
cleaned data ready to be analyzed (saved in ../processed).
Args:
----
input_filepath (click.Path): input file
output_filepath (click.Path): output file
"""
Expand Down
3 changes: 1 addition & 2 deletions src/otc/features/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
Support for features.
"""Support for features.
See `readme.md` for instructions on how to run.
"""
3 changes: 1 addition & 2 deletions src/otc/features/build_features.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
Defines feature sets.
"""Defines feature sets.
See notebook/3.0b-feature-engineering.ipynb for details.
"""
Expand Down
3 changes: 1 addition & 2 deletions src/otc/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
Support for metrics.
"""Support for metrics.
See `readme.md` for instructions on how to run.
"""
Loading

0 comments on commit 89300ff

Please sign in to comment.