diff --git a/.readthedocs.yml b/.readthedocs.yml index ca413dc3..2bfc8acb 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -20,4 +20,4 @@ python: build: os: "ubuntu-22.04" tools: - python: "3.8" + python: "3.11" diff --git a/README.rst b/README.rst index c3dff66f..e87ab667 100644 --- a/README.rst +++ b/README.rst @@ -49,7 +49,7 @@ You can install using either ``pip`` or ``conda``. See details `here `__ - `gridmap `__ (only required if you plan to run things in parallel on a DRMAA-compatible cluster) diff --git a/conda-recipe/README.md b/conda-recipe/README.md index 92e286b4..6a8eefd9 100644 --- a/conda-recipe/README.md +++ b/conda-recipe/README.md @@ -6,5 +6,5 @@ 3. Upload the package file to anaconda.org using `anaconda upload --user ets `. -4. Test the package: `conda create -n foobar -c ets -c conda-forge python=3.9 skll`. This should _always_ install the latest package from the ``ets`` conda channel. +4. Test the package: `conda create -n foobar -c https://conda.anaconda.org/ets -c conda-forge python=3.11 skll`. This should _always_ install the latest package from the ``ets`` conda channel. Note that we are specifying the ``ets`` channel first since SKLL is now also in conda-forge but runs a version behind until we do the actual release on GitHub. diff --git a/conda-recipe/skll/meta.yaml b/conda-recipe/skll/meta.yaml index 8347bb72..e3d0453b 100644 --- a/conda-recipe/skll/meta.yaml +++ b/conda-recipe/skll/meta.yaml @@ -1,6 +1,6 @@ package: name: skll - version: 4.0.1 + version: 5.0.0 source: path: ../../../skll @@ -24,21 +24,22 @@ build: requirements: host: - - python >=3.8 + - python >=3.10 - pip - setuptools run: - - python >=3.8 + - python >=3.10 - beautifulsoup4 - joblib - - numpy + - numpy<2 - pandas - ruamel.yaml - - scikit-learn >=1.3.0,<1.4.0 + - scikit-learn >=1.4.0,<1.5.0 - scipy - seaborn - tabulate - typing_extensions + - wandb test: # Python imports diff --git a/doc/conf.py b/doc/conf.py index 4ee20a02..e185da29 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -33,7 +33,7 @@ # General information about the project. project = "SciKit-Learn Laboratory" -copyright = "2012-2023, Educational Testing Service" +copyright = "2012-2024, Educational Testing Service" # The short X.Y version. version = __version__ @@ -70,7 +70,7 @@ # The paper size ('letterpaper' or 'a4paper'). "papersize": "letterpaper", # The font size ('10pt', '11pt' or '12pt'). - "pointsize": "12pt" + "pointsize": "12pt", # Additional stuff for the LaTeX preamble. #'preamble': '', } @@ -112,4 +112,4 @@ epub_title = "SciKit-Learn Laboratory" epub_author = "Educational Testing Service" epub_publisher = "Educational Testing Service" -epub_copyright = "2012-2023, Educational Testing Service" +epub_copyright = "2012-2024, Educational Testing Service" diff --git a/doc/contributing.rst b/doc/contributing.rst index 2d89d785..fc981cee 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -78,11 +78,11 @@ There are three main entry points into the SKLL codebase: `run_experiment `__ script. When you run the command ``run_experiment ``, the following happens (at a high level): - - the configuration file is handed off to the `run_configuration() `__ function in ``experiments.py``. + - the configuration file is handed off to the `run_configuration() `__ function in ``experiments.py``. - a `SKLLConfigParser `__ object is instantiated from ``config.py`` that parses all of the relevant fields out of the given configuration file. - - the configuration fields are then passed to the `_classify_featureset() `__ function in ``experiments.py`` which instantiates the learners (using code from ``learner.py``), the featuresets (using code from ``reader.py`` & ``featureset.py``), and runs the experiments, collects the results, and writes them out to disk. + - the configuration fields are then passed to the `_classify_featureset() `__ function in ``experiments.py`` which instantiates the learners (using code from ``learner.py``), the featuresets (using code from ``reader.py`` & ``featureset.py``), and runs the experiments, collects the results, and writes them out to disk. 2. **SKLL API**. Another way to interact with SKLL is via the SKLL API directly in your Python code rather than using configuration files. For example, you could use the `Learner.from_file() `__ or `VotingLearner.from_file() `__ methods to load saved models of those types from disk and make predictions on new data. The documentation for the SKLL API can be found `here `__. diff --git a/doc/run_experiment.rst b/doc/run_experiment.rst index 0b098cf2..16718de6 100644 --- a/doc/run_experiment.rst +++ b/doc/run_experiment.rst @@ -595,11 +595,27 @@ fixed_parameters *(Optional)* """"""""""""""""""""""""""""" List of dictionaries containing parameters you want to have fixed for each -learner in :ref:`learners` list. Any empty ones will be ignored -(and the defaults will be used). If :ref:`grid_search` is ``True``, +learner in the :ref:`learners` list. Empty dictionaries will be ignored +and the defaults will be used for these learners. If :ref:`grid_search` is ``True``, there is a potential for conflict with specified/default parameter grids and fixed parameters. +.. note:: + Tuples are not supported in the config file, and will lead to parsing errors. + Make sure to replace tuples with lists when specifying fixed parameters. + As an example, consider the following parameter that's usually defined as a tuple in scikit-learn: + + .. code-block:: python + + {'hidden_layer_sizes': (28, 28)} + + To specify it in `fixed_parameters`, use a list instead: + + .. code-block:: python + + {'hidden_layer_sizes': [28, 28]} + + The default fixed parameters (beyond those that ``scikit-learn`` sets) are: AdaBoostClassifier and AdaBoostRegressor @@ -1190,11 +1206,24 @@ SVR {'C': [0.01, 0.1, 1.0, 10.0, 100.0]} .. note:: - Note that learners not listed here do not have any default - parameter grids in SKLL either because there are no - hyper-parameters to tune or decisions about which parameters - to tune (and how) depend on the data being used for the - experiment and are best left up to the user. + 1. Learners not listed here do not have any default + parameter grids in SKLL either because there are no + hyper-parameters to tune or decisions about which parameters + to tune (and how) depend on the data being used for the + experiment and are best left up to the user. + 2. Tuples are not supported in the config file, and will lead to parsing errors. + Make sure to replace tuples with lists when specifying fixed parameters. + As an example, consider the following parameter that's usually defined as a tuple in scikit-learn: + + .. code-block:: python + + {'hidden_layer_sizes': (28, 28)} + + To specify it in `param_grids`, use a list instead: + + .. code-block:: python + + {'hidden_layer_sizes': [28, 28]} .. _pos_label: diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 0f2a8d5d..f92a3dc5 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -34,7 +34,7 @@ this is in a virtual environment. For this tutorial, we will use `conda `__ for creating our virtual environment as follows:: - conda create -n skllenv -c conda-forge -c ets python=3.9 skll + conda create -n skllenv -c conda-forge -c ets python=3.11 skll This will create a new virtual environment named ``skllenv`` with the latest release of SKLL which you can then activate by running diff --git a/pyproject.toml b/pyproject.toml index 2307d28c..314436c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,8 +4,12 @@ line-length = 100 target-version = ['py38'] [tool.ruff] -select = ["D", "E", "F", "I"] -ignore = ["D212"] +extend-exclude = ["setup.py"] +lint.select = ["D", "E", "F", "I"] +lint.ignore = ["D212"] line-length = 100 target-version = "py38" fix = true + +[mypy] +exclude = '^setup\.py$' diff --git a/requirements.txt b/requirements.txt index ddb4d101..75dbdb79 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ beautifulsoup4 coverage joblib nose2 -numpy +numpy<2 pandas pre-commit ruamel.yaml diff --git a/setup.py b/setup.py index 3f8d48f7..a2e8272d 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def requirements(): setup( name="skll", - version=__version__, # noqa: F821 + version=__version__, # type: ignore # noqa: F821 description=( "SciKit-Learn Laboratory makes it easier to run machine " "learning experiments with scikit-learn." @@ -59,10 +59,9 @@ def requirements(): "Operating System :: Unix", "Operating System :: MacOS", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ], zip_safe=False, ) diff --git a/skll/version.py b/skll/version.py index 1fa75d8f..66420e59 100644 --- a/skll/version.py +++ b/skll/version.py @@ -10,5 +10,5 @@ :organization: ETS """ -__version__ = "4.0.1" +__version__ = "5.0.0" VERSION = tuple(int(x) for x in __version__.split("."))