diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ea1f78ee8..a943e3d13 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -224,7 +224,7 @@ jobs: ./spin --help ./spin coverage --help ./spin test --help - ./spin coverage -k "slowtest" + ./spin coverage cp $PWD/build-install/usr/lib/python${{matrix.python-version}}/site-packages/coverage.xml ./coverage.xml - name: debug diff --git a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb index 8b5c34f46..3990764bf 100644 --- a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb +++ b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 11, "id": "5e2d1279-fa4f-47ef-aa48-fac6d47159ad", "metadata": {}, "outputs": [], @@ -55,9 +55,9 @@ " beta = 10.0\n", " sigma = 10.0 / sigma_factor\n", " n_samples = 2200\n", - " n_estimators = 125\n", + " n_estimators = 500\n", " test_size = 0.1\n", - " n_repeats = 500\n", + " n_repeats = 1000\n", " metric = \"mse\"\n", "\n", " rng = np.random.default_rng(seed)\n", @@ -134,7 +134,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 17, "id": "3db4f740-afd9-413e-8089-a8245f2a0747", "metadata": {}, "outputs": [], @@ -152,7 +152,7 @@ " max_features = \"sqrt\"\n", " test_size = 1.0 / 6\n", " metric = \"mse\"\n", - " n_repeats = 200\n", + " n_repeats = 500\n", " permute_per_tree = True\n", " sample_dataset_per_tree = True\n", "\n", @@ -239,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "14806903-933b-4e31-a2db-a3a45e0a6f82", "metadata": { "scrolled": true @@ -249,2814 +249,213 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n", + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X2/7: 1.0\n" + "X1: 0.9940119760479041\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X1: 1.0\n" + "X6: 0.36726546906187624\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X6: 1.0\n" + "X2/7: 0.23952095808383234\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X2/7: 1.0\n" + "X2/7: 0.49101796407185627\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n", + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X2/7: 1.0\n" + "X1: 0.00998003992015968\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X1: 1.0\n" + "X6: 0.3532934131736527\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] - }, + } + ], + "source": [ + "pvalue_dict = defaultdict(list)\n", + "rng = np.random.default_rng(seed)\n", + "\n", + "j_space = np.linspace(0.005, 2.25, 9)\n", + "\n", + "for sigma_factor in j_space:\n", + " for idx in range(5):\n", + " new_seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32)\n", + "\n", + " elements_dict = linear_model_ancova(\n", + " sigma_factor,\n", + " new_seed,\n", + " permute_per_tree=False,\n", + " sample_dataset_per_tree=False,\n", + " )\n", + " for key, value in elements_dict.items():\n", + " pvalue_dict[key].append(value)\n", + " pvalue_dict[\"sigma_factor\"].append(sigma_factor)\n", + "\n", + "df = pd.DataFrame(pvalue_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "5f4eb53c-c82d-4770-836a-552b910dd736", + "metadata": {}, + "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "[autoreload of sktree.stats.forestht failed: Traceback (most recent call last):\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 276, in check\n", + " superreload(m, reload, self.old_objects)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 500, in superreload\n", + " update_generic(old_obj, new_obj)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 397, in update_generic\n", + " update(a, b)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 349, in update_class\n", + " if update_generic(old_obj, new_obj):\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 397, in update_generic\n", + " update(a, b)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 309, in update_function\n", + " setattr(old, name, getattr(new, name))\n", + "ValueError: statistic() requires a code object with 1 free vars, not 0\n", + "]\n" ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "axs = axs.flatten()\n", + "\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", + "\n", + " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.legend()\n", + "fig.suptitle(\n", + " \"Linear ANCOVA model with FeatureImportanceRegressor (Permutation per tree and sample dataset per tree)\"\n", + ")\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "b470b569-4391-40e2-b7c4-a8439cc026c0", + "metadata": {}, + "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "pvalue_dict = defaultdict(list)\n", - "rng = np.random.default_rng(seed)\n", - "\n", - "j_space = np.linspace(0.005, 2.25, 9)\n", - "\n", - "for sigma_factor in j_space:\n", - " for idx in range(5):\n", - " new_seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32)\n", + "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "axs = axs.flatten()\n", "\n", - " elements_dict = linear_model_ancova(\n", - " sigma_factor,\n", - " new_seed,\n", - " permute_per_tree=True,\n", - " sample_dataset_per_tree=False,\n", - " )\n", - " for key, value in elements_dict.items():\n", - " pvalue_dict[key].append(value)\n", - " pvalue_dict[\"sigma_factor\"].append(sigma_factor)\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", "\n", - "df = pd.DataFrame(pvalue_dict)" + " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.legend()\n", + "fig.suptitle(\n", + " \"Linear ANCOVA model with FeatureImportanceRegressor (Permutation per tree and sample dataset per tree)\"\n", + ")\n", + "fig.tight_layout()" ] }, { diff --git a/examples/plot_MI_gigantic_hypothesis_testing_forest.py b/examples/plot_MI_gigantic_hypothesis_testing_forest.py index 8408f4e03..fd4f84765 100644 --- a/examples/plot_MI_gigantic_hypothesis_testing_forest.py +++ b/examples/plot_MI_gigantic_hypothesis_testing_forest.py @@ -90,10 +90,10 @@ # computed as the proportion of samples in the null distribution that are less than the # observed test statistic. -n_estimators = 125 +n_estimators = 200 max_features = "sqrt" test_size = 0.2 -n_repeats = 500 +n_repeats = 1000 n_jobs = -1 est = FeatureImportanceForestClassifier( @@ -107,8 +107,8 @@ ), random_state=seed, test_size=test_size, - permute_per_tree=True, - sample_dataset_per_tree=True, + permute_per_tree=False, + sample_dataset_per_tree=False, ) # we test for the first feature set, which is important and thus should return a pvalue < 0.05 diff --git a/sktree/_lib/sklearn_fork b/sktree/_lib/sklearn_fork index e2fee00aa..d9d1ea68f 160000 --- a/sktree/_lib/sklearn_fork +++ b/sktree/_lib/sklearn_fork @@ -1 +1 @@ -Subproject commit e2fee00aa461c21b8cfa59eb907d27972415c99b +Subproject commit d9d1ea68fde4f0bf90caff21dc26044ace3114ae diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 5ce124174..427de8251 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -23,6 +23,7 @@ POSTERIOR_FUNCTIONS, REGRESSOR_METRICS, _compute_null_distribution_coleman, + _non_nan_samples, train_tree, ) @@ -140,7 +141,7 @@ def _check_input(self, X: ArrayLike, y: ArrayLike, covariate_index: ArrayLike = if not isinstance(covariate_index, (list, tuple, np.ndarray)): raise RuntimeError("covariate_index must be an iterable of integer indices") else: - if not all(isinstance(idx, int) for idx in covariate_index): + if not all(isinstance(idx, (np.integer, int)) for idx in covariate_index): raise RuntimeError("Not all covariate_index are integer indices") if self._n_samples_ is not None and X.shape[0] != self._n_samples_: @@ -362,7 +363,7 @@ def test( # the posteriors and computing the test statistic on the resampled posteriors if self.sample_dataset_per_tree: metric_star, metric_star_pi = _compute_null_distribution_coleman( - y_test=y[observe_samples, :], + y_test=y, y_pred_proba_normal=observe_posteriors, y_pred_proba_perm=permute_posteriors, metric=metric, @@ -375,10 +376,13 @@ def test( # there is only one train and test split, so we can just use that _, indices_test = self.train_test_samples_[0] y_test = y[indices_test, :] + y_pred_proba_normal = observe_posteriors[:, indices_test, :] + y_pred_proba_perm = permute_posteriors[:, indices_test, :] + metric_star, metric_star_pi = _compute_null_distribution_coleman( y_test=y_test, - y_pred_proba_normal=observe_posteriors, - y_pred_proba_perm=permute_posteriors, + y_pred_proba_normal=y_pred_proba_normal, + y_pred_proba_perm=y_pred_proba_perm, metric=metric, n_repeats=n_repeats, seed=self.random_state, @@ -588,19 +592,15 @@ def _statistic( samples = indices_test y_true_final = y_test - # determine if there are any nans in the final posterior array - temp_posterior_forest = np.nanmean(posterior_arr, axis=0) - nonnan_indices = np.where(~np.isnan(temp_posterior_forest).any(axis=1))[0] - - # Find the row indices with NaN values in any column - samples = nonnan_indices + # determine if there are any nans in the final posterior array, when + # averaged over the trees + samples = _non_nan_samples(posterior_arr) # Ignore all NaN values (samples not tested) - y_true_final = y[(nonnan_indices), :] - posterior_arr = posterior_arr[:, (nonnan_indices), :] + y_true_final = y[(samples), :] # Average all posteriors (n_samples_test, n_outputs) to compute the statistic - posterior_forest = np.nanmean(posterior_arr, axis=0) + posterior_forest = np.nanmean(posterior_arr[:, (samples), :], axis=0) stat = metric_func(y_true_final, posterior_forest, **metric_kwargs) if covariate_index is None: # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) @@ -750,6 +750,7 @@ def _statistic( if predict_posteriors: # now initialize posterior array as (n_trees, n_samples_test, n_classes) + # XXX: currently assumes n_outputs_ == 1 posterior_arr = np.full( (self.n_estimators, self._n_samples_, estimator.n_classes_), np.nan ) @@ -818,19 +819,15 @@ def _statistic( f"AUC metric is not supported for {self._type_of_target_} targets." ) - # determine if there are any nans in the final posterior array - temp_posterior_forest = np.nanmean(posterior_arr, axis=0) - nonnan_indices = np.where(~np.isnan(temp_posterior_forest).any(axis=1))[0] - - # Find the row indices with NaN values in any column - samples = nonnan_indices + # determine if there are any nans in the final posterior array, when + # averaged over the trees + samples = _non_nan_samples(posterior_arr) # Ignore all NaN values (samples not tested) - y_true_final = y[(nonnan_indices), :] - posterior_arr = posterior_arr[:, (nonnan_indices), :] + y_true_final = y[(samples), :] # Average all posteriors (n_samples_test, n_outputs) to compute the statistic - posterior_forest = np.nanmean(posterior_arr, axis=0) + posterior_forest = np.nanmean(posterior_arr[:, (samples), :], axis=0) stat = metric_func(y_true_final, posterior_forest, **metric_kwargs) if covariate_index is None: @@ -846,51 +843,3 @@ def _statistic( return stat, posterior_arr, samples return stat - - def statistic( - self, - X: ArrayLike, - y: ArrayLike, - covariate_index: ArrayLike = None, - metric="mi", - return_posteriors: bool = False, - check_input: bool = True, - **metric_kwargs, - ): - """Compute the test statistic. - - Parameters - ---------- - X : ArrayLike of shape (n_samples, n_features) - The data matrix. - y : ArrayLike of shape (n_samples, n_outputs) - The target matrix. - covariate_index : ArrayLike, optional of shape (n_covariates,) - The index array of covariates to shuffle, by default None. - metric : str, optional - The metric to compute, by default "mi", which computes Mutual Information. - return_posteriors : bool, optional - Whether or not to return the posteriors, by default False. - check_input : bool, optional - Whether or not to check the input, by default True. - **metric_kwargs : dict, optional - Additional keyword arguments to pass to the metric function. - - Returns - ------- - stat : float - The test statistic. - posterior_final : ArrayLike of shape (n_estimators, n_samples_final, n_outputs) or - (n_estimators, n_samples_final), optional - If ``return_posteriors`` is True, then the posterior probabilities of the - samples used in the final test. ``n_samples_final`` is equal to ``n_samples`` - if all samples are encountered in the test set of at least one tree in the - posterior computation. - samples : ArrayLike of shape (n_samples_final,), optional - The indices of the samples used in the final test. ``n_samples_final`` is - equal to ``n_samples`` if all samples are encountered in the test set of at - least one tree in the posterior computation. - """ - return super().statistic( - X, y, covariate_index, metric, return_posteriors, check_input, **metric_kwargs - ) diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 9b849ed22..20fc68e52 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -1,7 +1,11 @@ +import pickle +from pathlib import Path + import numpy as np import pytest from flaky import flaky from joblib import Parallel, delayed +from numpy.testing import assert_array_equal from scipy.special import expit from sklearn import datasets @@ -124,8 +128,8 @@ def test_featureimportance_forest_errors(): "sample_dataset_per_tree": True, }, 300, # n_samples - 500, # n_repeats - 0.1, # test_size + 1000, # n_repeats + 0.2, # test_size ], ], ) @@ -205,12 +209,12 @@ def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size) n_jobs=-1, ), "random_state": seed, - "permute_per_tree": True, - "sample_dataset_per_tree": True, + "permute_per_tree": False, + "sample_dataset_per_tree": False, }, - 600, - 200, - 1.0 / 6, + 600, # n_samples + 1000, # n_repeats + 1.0 / 6, # test_size ], ], ) @@ -266,7 +270,6 @@ def test_correlated_logit_model(hypotester, model_kwargs, n_samples, n_repeats, @flaky(max_runs=2) -@pytest.mark.slowtest @pytest.mark.parametrize("criterion", ["gini", "entropy"]) @pytest.mark.parametrize("honest_prior", ["empirical", "uniform"]) @pytest.mark.parametrize( @@ -408,7 +411,54 @@ def run_forest(covariate_index=None): pvalue = clf.test(X, y, covariate_index=[covariate_index], metric="mi") return pvalue - out = Parallel(n_jobs=1, backend=backend)( + out = Parallel(n_jobs=-1, backend=backend)( delayed(run_forest)(covariate_index) for covariate_index in range(n_features) ) assert len(out) == n_features + + +def test_pickle(tmpdir): + """Test that pickling works and preserves fitted attributes.""" + n_samples = 100 + n_features = 5 + X = rng.uniform(size=(n_samples, n_features)) + y = rng.integers(0, 2, size=n_samples) # Binary classification + n_repeats = 1000 + + clf = FeatureImportanceForestClassifier( + estimator=HonestForestClassifier( + n_estimators=10, random_state=seed, n_jobs=1, honest_fraction=0.2 + ), + test_size=0.5, + ) + stat, pvalue = clf.test(X, y, covariate_index=[1], metric="mi", n_repeats=n_repeats) + + with open(Path(tmpdir) / "clf.pkl", "wb") as fpath: + pickle.dump(clf, fpath) + + with open(Path(tmpdir) / "clf.pkl", "rb") as fpath: + clf_pickle = pickle.load(fpath) + + # recompute pvalue manually and compare + pickle_pvalue = ( + 1.0 + (clf_pickle.null_dist_ <= (clf_pickle.permute_stat_ - clf_pickle.observe_stat_)).sum() + ) / (1.0 + n_repeats) + assert pvalue == pickle_pvalue + assert clf_pickle.permute_stat_ - clf_pickle.observe_stat_ == stat + + attr_list = [ + "test_size", + "observe_samples_", + "y_true_final_", + "observe_posteriors_", + "observe_stat_", + "_is_fitted", + "permute_samples_", + "permute_posteriors_", + "permute_stat_", + "n_samples_test_", + "_n_samples_", + "_metric", + ] + for attr in attr_list: + assert_array_equal(getattr(clf, attr), getattr(clf_pickle, attr)) diff --git a/sktree/stats/utils.py b/sktree/stats/utils.py index 30ce83ff8..d2f36c1e4 100644 --- a/sktree/stats/utils.py +++ b/sktree/stats/utils.py @@ -16,12 +16,12 @@ def _mutual_information(y_true: ArrayLike, y_pred_proba: ArrayLike) -> float: - """Compute estimate of mutual information. + """Compute estimate of mutual information for supervised classification setting. Parameters ---------- y_true : ArrayLike of shape (n_samples,) - _description_ + The true labels. y_pred_proba : ArrayLike of shape (n_samples, n_outputs) Posterior probabilities. @@ -41,21 +41,69 @@ def _mutual_information(y_true: ArrayLike, y_pred_proba: ArrayLike) -> float: return H_Y - H_YX +def _cond_entropy(y_true: ArrayLike, y_pred_proba: ArrayLike) -> float: + """Compute estimate of entropy for supervised classification setting. + + H(Y | X) + + Parameters + ---------- + y_true : ArrayLike of shape (n_samples,) + The true labels. Not used in computation of the entropy. + y_pred_proba : ArrayLike of shape (n_samples, n_outputs) + Posterior probabilities. + + Returns + ------- + float : + The estimated MI. + """ + if y_true.squeeze().ndim != 1: + raise ValueError(f"y_true must be 1d, not {y_true.shape}") + + # entropy averaged over n_samples + H_YX = np.mean(entropy(y_pred_proba, base=np.exp(1), axis=1)) + return H_YX + + METRIC_FUNCTIONS = { "mse": mean_squared_error, "mae": mean_absolute_error, "balanced_accuracy": balanced_accuracy_score, "auc": roc_auc_score, "mi": _mutual_information, + "cond_entropy": _cond_entropy, } -POSTERIOR_FUNCTIONS = ("mi", "auc") +POSTERIOR_FUNCTIONS = ("mi", "auc", "cond_entropy") POSITIVE_METRICS = ("mi", "auc", "balanced_accuracy") REGRESSOR_METRICS = ("mse", "mae") +def _non_nan_samples(posterior_arr: ArrayLike) -> ArrayLike: + """Determine which samples are not nan in the posterior tree array. + + Parameters + ---------- + posterior_arr : ArrayLike of shape (n_trees, n_samples, n_outputs) + The 3D posterior array from the forest. + + Returns + ------- + nonnan_indices : ArrayLike of shape (n_nonnan_samples,) + The indices of the samples that are not nan in the posterior array + along axis=1. + """ + # Find the row indices with NaN values along the specified axis + nan_indices = np.isnan(posterior_arr).any(axis=2).all(axis=0) + + # Invert the boolean mask to get indices without NaN values + nonnan_indices = np.where(~nan_indices)[0] + return nonnan_indices + + def train_tree( tree: DecisionTreeClassifier, X: ArrayLike, @@ -218,16 +266,30 @@ def _compute_null_distribution_coleman( rng.shuffle(y_pred_ind_arr) # get random half of the posteriors from two sets of trees - first_forest_inds = y_pred_ind_arr[:n_samples_test] - second_forest_inds = y_pred_ind_arr[:n_samples_test] + first_forest_inds = y_pred_ind_arr[: n_estimators // 2] + second_forest_inds = y_pred_ind_arr[n_estimators // 2 :] + + # get random half of the posteriors as one forest + first_forest_pred = all_y_pred[first_forest_inds, ...] + second_forest_pred = all_y_pred[second_forest_inds, ...] + + # determine if there are any nans in the final posterior array, when + # averaged over the trees + first_forest_samples = _non_nan_samples(first_forest_pred) + second_forest_samples = _non_nan_samples(second_forest_pred) + + # todo: is this step necessary? + non_nan_samples = np.intersect1d( + first_forest_samples, second_forest_samples, assume_unique=True + ) - # get random half of the posteriors - y_pred_first_half = np.nanmean(all_y_pred[first_forest_inds], axis=0) - y_pred_second_half = np.nanmean(all_y_pred[second_forest_inds], axis=0) + # now average the posteriors over the trees for the non-nan samples + y_pred_first_half = np.nanmean(first_forest_pred[:, non_nan_samples, :], axis=0) + y_pred_second_half = np.nanmean(second_forest_pred[:, non_nan_samples, :], axis=0) # compute two instances of the metric from the sampled trees - first_half_metric = metric_func(y_test, y_pred_first_half) - second_half_metric = metric_func(y_test, y_pred_second_half) + first_half_metric = metric_func(y_test[non_nan_samples, :], y_pred_first_half) + second_half_metric = metric_func(y_test[non_nan_samples, :], y_pred_second_half) metric_star[idx] = first_half_metric metric_star_pi[idx] = second_half_metric