diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0eb0022..cbadf80 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -11,7 +11,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: [3.8] + python-version: ['3.10'] os: [ubuntu-latest] steps: - uses: actions/checkout@v1 @@ -20,7 +20,7 @@ jobs: with: python-version: ${{ matrix.python-version }} - name: Upgrade pip - run: pip install -U pip setuptools wheel + run: pip install -U "pip<=24.1" setuptools wheel - name: Install lightfm run: python -m pip install --no-use-pep517 'lightfm<2' - name: Install package @@ -32,7 +32,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: [3.6, 3.7, 3.8] + python-version: ['3.8', '3.9', '3.10', '3.11'] os: [ubuntu-20.04, macos-latest] steps: - uses: actions/checkout@v1 @@ -53,7 +53,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ['3.6', '3.7', '3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] os: [ubuntu-20.04, macos-latest] steps: - uses: actions/checkout@v1 @@ -70,7 +70,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: [3.6, 3.7, 3.8] + python-version: ['3.8', '3.9', '3.10', '3.11'] os: [ubuntu-20.04, macos-latest] steps: - uses: actions/checkout@v1 @@ -91,7 +91,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: [3.6, 3.7, 3.8] + python-version: ['3.8', '3.9', '3.10', '3.11'] os: [ubuntu-20.04] steps: - uses: actions/checkout@v1 diff --git a/README.md b/README.md index 662a3ed..fb5ba34 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ Features include: ## Requirements -**MLBlocks** has been developed and tested on [Python 3.6, 3.7, 3.8, 3.9, and 3.10](https://www.python.org/downloads/) +**MLBlocks** has been developed and tested on [Python 3.8, 3.9, 3.10, 3.11, 3.12, 3.13](https://www.python.org/downloads/) ## Install with `pip` diff --git a/examples/tutorials/7. Tuning a Pipeline.ipynb b/examples/tutorials/7. Tuning a Pipeline.ipynb index 7a288a4..484e0b2 100644 --- a/examples/tutorials/7. Tuning a Pipeline.ipynb +++ b/examples/tutorials/7. Tuning a Pipeline.ipynb @@ -181,7 +181,7 @@ { "data": { "text/plain": [ - "0.8639171383183359" + "0.863978563379761" ] }, "execution_count": 6, @@ -210,7 +210,7 @@ { "data": { "text/plain": [ - "0.8686773872402614" + "0.868554574842" ] }, "execution_count": 7, @@ -242,7 +242,7 @@ "metadata": {}, "outputs": [], "source": [ - "from btb.tuning import Tunable\n", + "from baytune.tuning import Tunable\n", "\n", "tunable = Tunable.from_dict(tunable_hyperparameters)" ] @@ -265,7 +265,7 @@ "metadata": {}, "outputs": [], "source": [ - "from btb.tuning import GPTuner\n", + "from baytune.tuning import GPTuner\n", "\n", "tuner = GPTuner(tunable)" ] @@ -345,16 +345,15 @@ "output_type": "stream", "text": [ "scoring pipeline 1\n", + "New best found: 0.871994161365419\n", "scoring pipeline 2\n", + "New best found: 0.8723319756253888\n", "scoring pipeline 3\n", "scoring pipeline 4\n", - "New best found: 0.8642241881762839\n", "scoring pipeline 5\n", "scoring pipeline 6\n", "scoring pipeline 7\n", - "New best found: 0.8644390957265209\n", "scoring pipeline 8\n", - "New best found: 0.8679095503945804\n", "scoring pipeline 9\n", "scoring pipeline 10\n" ] @@ -395,13 +394,13 @@ "data": { "text/plain": [ "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n", - " 'max_labels'): 39,\n", - " ('sklearn.impute.SimpleImputer#1', 'strategy'): 'most_frequent',\n", - " ('xgboost.XGBClassifier#1', 'n_estimators'): 70,\n", - " ('xgboost.XGBClassifier#1', 'max_depth'): 6,\n", - " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.07406443671152008,\n", - " ('xgboost.XGBClassifier#1', 'gamma'): 0.9244108160038952,\n", - " ('xgboost.XGBClassifier#1', 'min_child_weight'): 1}" + " 'max_labels'): 60,\n", + " ('sklearn.impute.SimpleImputer#1', 'strategy'): 'mean',\n", + " ('xgboost.XGBClassifier#1', 'n_estimators'): 190,\n", + " ('xgboost.XGBClassifier#1', 'max_depth'): 5,\n", + " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.13575511242790694,\n", + " ('xgboost.XGBClassifier#1', 'gamma'): 0.6326488945712287,\n", + " ('xgboost.XGBClassifier#1', 'min_child_weight'): 8}" ] }, "execution_count": 13, @@ -443,7 +442,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -457,7 +456,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.10.15" } }, "nbformat": 4, diff --git a/examples/tutorials/8. Searching for the best pipeline with BTBSession.ipynb b/examples/tutorials/8. Searching for the best pipeline with BTBSession.ipynb index 80ad93f..a7e9d69 100644 --- a/examples/tutorials/8. Searching for the best pipeline with BTBSession.ipynb +++ b/examples/tutorials/8. Searching for the best pipeline with BTBSession.ipynb @@ -157,7 +157,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -286,7 +286,7 @@ "metadata": {}, "outputs": [], "source": [ - "from btb.session import BTBSession\n", + "from baytune.session import BTBSession\n", "\n", "session = BTBSession(tunables, cross_validate, verbose=True)" ] @@ -314,12 +314,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "342fe40f08024adcb5b60eea25f49d37", + "model_id": "00c20e4b982f42a1873c0d12f550ee4b", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))" + " 0%| | 0/5 [00:00\", line 11, in cross_validate\n", + " File \"/var/folders/by/d1f3gk0x14v54qggfxmjbn1c0000gn/T/ipykernel_19852/2674531477.py\", line 11, in cross_validate\n", " pipeline.fit(X_train, y_train)\n", - " File \"/home/xals/Projects/MIT/MLBlocks.clean/mlblocks/mlpipeline.py\", line 754, in fit\n", - " block, block_name, context, output_variables, outputs, debug_info)\n", - " File \"/home/xals/Projects/MIT/MLBlocks.clean/mlblocks/mlpipeline.py\", line 645, in _produce_block\n", + " File \"/Users/sarah/Documents/git-repos/MLBlocks/mlblocks/mlpipeline.py\", line 805, in fit\n", + " self._produce_block(\n", + " File \"/Users/sarah/Documents/git-repos/MLBlocks/mlblocks/mlpipeline.py\", line 679, in _produce_block\n", " block_outputs = block.produce(**produce_args)\n", - " File \"/home/xals/Projects/MIT/MLBlocks.clean/mlblocks/mlblock.py\", line 322, in produce\n", + " File \"/Users/sarah/Documents/git-repos/MLBlocks/mlblocks/mlblock.py\", line 331, in produce\n", " return getattr(self.instance, self.produce_method)(**produce_kwargs)\n", - " File \"/home/xals/.virtualenvs/MLBlocks.clean/lib/python3.6/site-packages/mlprimitives/custom/text.py\", line 111, in produce\n", + " File \"/Users/sarah/Documents/git-repos/MLPrimitives/mlprimitives/custom/text.py\", line 111, in produce\n", " texts = X[self.column]\n", - " File \"/home/xals/.virtualenvs/MLBlocks.clean/lib/python3.6/site-packages/pandas/core/frame.py\", line 2927, in __getitem__\n", + " File \"/opt/anaconda3/envs/py10/lib/python3.10/site-packages/pandas/core/frame.py\", line 3807, in __getitem__\n", " indexer = self.columns.get_loc(key)\n", - " File \"/home/xals/.virtualenvs/MLBlocks.clean/lib/python3.6/site-packages/pandas/core/indexes/base.py\", line 2659, in get_loc\n", - " return self._engine.get_loc(self._maybe_cast_indexer(key))\n", - " File \"pandas/_libs/index.pyx\", line 108, in pandas._libs.index.IndexEngine.get_loc\n", - " File \"pandas/_libs/index.pyx\", line 132, in pandas._libs.index.IndexEngine.get_loc\n", - " File \"pandas/_libs/hashtable_class_helper.pxi\", line 1601, in pandas._libs.hashtable.PyObjectHashTable.get_item\n", - " File \"pandas/_libs/hashtable_class_helper.pxi\", line 1608, in pandas._libs.hashtable.PyObjectHashTable.get_item\n", + " File \"/opt/anaconda3/envs/py10/lib/python3.10/site-packages/pandas/core/indexes/base.py\", line 3804, in get_loc\n", + " raise KeyError(key) from err\n", "KeyError: 'text'\n", - "2020-09-16 16:32:46,587 - WARNING - btb.session - Too many errors: 1. Removing tunable single_table.classification.text\n", - "2020-09-16 16:32:46,589 - INFO - btb.session - Creating Tunable instance from dict.\n", - "2020-09-16 16:32:46,589 - INFO - btb.session - Obtaining default configuration for single_table.classification.xgb\n", - "2020-09-16 16:32:52,100 - INFO - btb.session - Generating new proposal configuration for single_table.classification\n", - "2020-09-16 16:33:28,900 - INFO - btb.session - New optimal found: single_table.classification - 0.8728234138413778\n", - "2020-09-16 16:33:28,904 - INFO - btb.session - Generating new proposal configuration for single_table.classification.xgb\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" + "Too many errors: 1. Removing tunable single_table.classification.text\n" ] }, { "data": { "text/plain": [ - "{'id': '7e662f9b90f0e123939b7532ecc221c7',\n", - " 'name': 'single_table.classification',\n", + "{'id': '0ebe8af9c06a05f39821de36d6c9ffc2',\n", + " 'name': 'single_table.classification.xgb',\n", " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n", - " 'max_labels'): 63,\n", - " ('mlprimitives.custom.feature_extraction.StringVectorizer#1',\n", - " 'lowercase'): True,\n", - " ('mlprimitives.custom.feature_extraction.StringVectorizer#1',\n", - " 'binary'): True,\n", - " ('mlprimitives.custom.feature_extraction.StringVectorizer#1',\n", - " 'max_features'): 7315,\n", + " 'max_labels'): 52,\n", " ('sklearn.impute.SimpleImputer#1', 'strategy'): 'median',\n", - " ('xgboost.XGBClassifier#1', 'n_estimators'): 879,\n", - " ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n", - " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.23231879890615814,\n", - " ('xgboost.XGBClassifier#1', 'gamma'): 0.5474914147721585,\n", - " ('xgboost.XGBClassifier#1', 'min_child_weight'): 3},\n", - " 'score': 0.8728234138413778}" + " ('xgboost.XGBClassifier#1', 'n_estimators'): 313,\n", + " ('xgboost.XGBClassifier#1', 'max_depth'): 5,\n", + " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.7119589664956909,\n", + " ('xgboost.XGBClassifier#1', 'gamma'): 0.944854007471167,\n", + " ('xgboost.XGBClassifier#1', 'min_child_weight'): 10},\n", + " 'score': 0.8641320270062784}" ] }, "execution_count": 11, @@ -489,23 +458,17 @@ { "data": { "text/plain": [ - "{'id': '7e662f9b90f0e123939b7532ecc221c7',\n", - " 'name': 'single_table.classification',\n", + "{'id': '0ebe8af9c06a05f39821de36d6c9ffc2',\n", + " 'name': 'single_table.classification.xgb',\n", " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n", - " 'max_labels'): 63,\n", - " ('mlprimitives.custom.feature_extraction.StringVectorizer#1',\n", - " 'lowercase'): True,\n", - " ('mlprimitives.custom.feature_extraction.StringVectorizer#1',\n", - " 'binary'): True,\n", - " ('mlprimitives.custom.feature_extraction.StringVectorizer#1',\n", - " 'max_features'): 7315,\n", + " 'max_labels'): 52,\n", " ('sklearn.impute.SimpleImputer#1', 'strategy'): 'median',\n", - " ('xgboost.XGBClassifier#1', 'n_estimators'): 879,\n", - " ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n", - " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.23231879890615814,\n", - " ('xgboost.XGBClassifier#1', 'gamma'): 0.5474914147721585,\n", - " ('xgboost.XGBClassifier#1', 'min_child_weight'): 3},\n", - " 'score': 0.8728234138413778}" + " ('xgboost.XGBClassifier#1', 'n_estimators'): 313,\n", + " ('xgboost.XGBClassifier#1', 'max_depth'): 5,\n", + " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.7119589664956909,\n", + " ('xgboost.XGBClassifier#1', 'gamma'): 0.944854007471167,\n", + " ('xgboost.XGBClassifier#1', 'min_child_weight'): 10},\n", + " 'score': 0.8641320270062784}" ] }, "execution_count": 12, @@ -539,71 +502,31 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8dd5d4626f304c279b2b368a671b6cb7", + "model_id": "a0dbe69a0340455a937f7376f7723ec4", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))" + " 0%| | 0/10 [00:00=0.9,<1', - 'numpy>=1.17.1,<2', - 'psutil>=5,<6', + 'numpy>=1.17.1,<3', + 'psutil>=5,<7', ] mlprimitives_requires = [ - 'mlprimitives>=0.3.0,<0.4', - 'h5py<2.11.0,>=2.10.0', # <- tensorflow 2.3.2 conflict - 'matplotlib<3.2.2,>=2.2.2', # <- copulas 0.3.3 + 'mlprimitives>=0.4.0,<0.5', + 'h5py<4,>=2.10.0', # <- tensorflow 2.3.2 conflict + 'matplotlib<4,>=2.2.2', # <- copulas 0.3.3 'protobuf<4', # <- importlib ] examples_require = mlprimitives_requires + [ 'jupyter==1.0.0', - 'baytune>=0.4.0,<0.5', + 'baytune>=0.5.0,<0.6', + 'copulas<0.12', ] @@ -50,7 +51,7 @@ # general 'bumpversion>=0.5.3,<0.6', 'pip>=9.0.1', - 'watchdog>=0.8.3,<0.11', + 'watchdog>=0.8.3,<5', # docs 'm2r>=0.2.0,<0.3', @@ -62,6 +63,15 @@ 'Jinja2>=2,<3', # >=3 makes sphinx theme fail 'markupsafe<2.1.0', + # fails on Sphinx < v3.4 + 'alabaster<=0.7.12', + # fails on Sphins < v5.0 + 'sphinxcontrib-applehelp<1.0.8', + 'sphinxcontrib-devhelp<1.0.6', + 'sphinxcontrib-htmlhelp<2.0.5', + 'sphinxcontrib-serializinghtml<1.1.10', + 'sphinxcontrib-qthelp<1.0.7', + # style check 'flake8>=3.7.7,<4', 'isort>=4.3.4,<5', @@ -93,12 +103,12 @@ 'License :: OSI Approved :: MIT License', 'Natural Language :: English', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', ], description='Pipelines and primitives for machine learning and data science.', extras_require={ @@ -116,7 +126,7 @@ long_description_content_type='text/markdown', name='mlblocks', packages=find_packages(include=['mlblocks', 'mlblocks.*']), - python_requires='>=3.6,<3.12', + python_requires='>=3.8,<3.14', setup_requires=setup_requires, test_suite='tests', tests_require=tests_require, diff --git a/tox.ini b/tox.ini index 27e499e..cdaadc2 100644 --- a/tox.ini +++ b/tox.ini @@ -3,12 +3,12 @@ envlist = py3{6,7,8,9,10,11}, test-devel [travis] python = - 3.11: py11 - 3.10: py10 + 3.13: py313 + 3.12: py312 + 3.11: py311 + 3.10: py310 3.9: py39 3.8: py38, test-devel - 3.7: py37 - 3.6: py36 [testenv] passenv = CI TRAVIS TRAVIS_*