Skip to content

Commit

Permalink
fix data loading
Browse files Browse the repository at this point in the history
  • Loading branch information
sarahmish committed Sep 25, 2023
1 parent df295cf commit 68538ac
Show file tree
Hide file tree
Showing 11 changed files with 81,562 additions and 47 deletions.
13 changes: 9 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,15 @@ pipeline which combines primitives from [MLPrimitives](https://github.com/MLBaza

```python3
from mlblocks import MLPipeline
from mlprimitives.datasets import load_dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

dataset = load_dataset('census')
X_train, X_test, y_train, y_test = dataset.get_splits(1)
import pandas as pd

dataset = pd.read_csv('http://mlblocks.s3.amazonaws.com/census.csv')
label = dataset.pop('label')

X_train, X_test, y_train, y_test = train_test_split(dataset, label, stratify=label)

primitives = [
'mlprimitives.custom.preprocessing.ClassEncoder',
Expand All @@ -104,7 +109,7 @@ pipeline = MLPipeline(primitives)
pipeline.fit(X_train, y_train)
predictions = pipeline.predict(X_test)

dataset.score(y_test, predictions)
accuracy_score(y_test, predictions)
```

# What's Next?
Expand Down
23 changes: 15 additions & 8 deletions examples/tutorials/1. Using and MLPipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@
"metadata": {},
"outputs": [],
"source": [
"from mlprimitives.datasets import load_dataset\n",
"from utils import load_census\n",
"\n",
"dataset = load_dataset('census')"
"dataset = load_census()"
]
},
{
Expand Down Expand Up @@ -528,7 +528,16 @@
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/sarah/anaconda3/envs/mlp/lib/python3.8/site-packages/sklearn/impute/_base.py:382: FutureWarning: The 'verbose' parameter was deprecated in version 1.1 and will be removed in 1.3. A warning will always be raised upon the removal of empty columns in the future version.\n",
" warnings.warn(\n"
]
}
],
"source": [
"pipeline.fit(X_train, y_train)"
]
Expand All @@ -546,9 +555,7 @@
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [],
"source": [
"predictions = pipeline.predict(X_test)"
Expand Down Expand Up @@ -611,7 +618,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -625,7 +632,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
"version": "3.8.16"
}
},
"nbformat": 4,
Expand Down
24 changes: 20 additions & 4 deletions examples/tutorials/3. Setting MLPipeline Hyperparameters.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@
"metadata": {},
"outputs": [],
"source": [
"from mlprimitives.datasets import load_dataset\n",
"from utils import load_census\n",
"\n",
"dataset = load_dataset('census')\n",
"dataset = load_census()\n",
"X_train, X_test, y_train, y_test = dataset.get_splits(1)"
]
},
Expand Down Expand Up @@ -268,6 +268,14 @@
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/sarah/anaconda3/envs/mlp/lib/python3.8/site-packages/sklearn/impute/_base.py:382: FutureWarning: The 'verbose' parameter was deprecated in version 1.1 and will be removed in 1.3. A warning will always be raised upon the removal of empty columns in the future version.\n",
" warnings.warn(\n"
]
},
{
"data": {
"text/plain": [
Expand Down Expand Up @@ -394,6 +402,14 @@
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/sarah/anaconda3/envs/mlp/lib/python3.8/site-packages/sklearn/impute/_base.py:382: FutureWarning: The 'verbose' parameter was deprecated in version 1.1 and will be removed in 1.3. A warning will always be raised upon the removal of empty columns in the future version.\n",
" warnings.warn(\n"
]
},
{
"data": {
"text/plain": [
Expand All @@ -415,7 +431,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -429,7 +445,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
"version": "3.8.16"
}
},
"nbformat": 4,
Expand Down
19 changes: 14 additions & 5 deletions examples/tutorials/4. Saving and Loading a Pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@
"metadata": {},
"outputs": [],
"source": [
"from mlprimitives.datasets import load_dataset\n",
"from utils import load_census\n",
"\n",
"dataset = load_dataset('census')"
"dataset = load_census()"
]
},
{
Expand Down Expand Up @@ -71,7 +71,16 @@
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/sarah/anaconda3/envs/mlp/lib/python3.8/site-packages/sklearn/impute/_base.py:382: FutureWarning: The 'verbose' parameter was deprecated in version 1.1 and will be removed in 1.3. A warning will always be raised upon the removal of empty columns in the future version.\n",
" warnings.warn(\n"
]
}
],
"source": [
"pipeline.fit(X_train, y_train)"
]
Expand Down Expand Up @@ -166,7 +175,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -180,7 +189,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
"version": "3.8.16"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@
"metadata": {},
"outputs": [],
"source": [
"from mlprimitives.datasets import load_dataset\n",
"from utils import load_census\n",
"\n",
"dataset = load_dataset('census')"
"dataset = load_census()"
]
},
{
Expand Down Expand Up @@ -430,7 +430,16 @@
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/sarah/anaconda3/envs/mlp/lib/python3.8/site-packages/sklearn/impute/_base.py:382: FutureWarning: The 'verbose' parameter was deprecated in version 1.1 and will be removed in 1.3. A warning will always be raised upon the removal of empty columns in the future version.\n",
" warnings.warn(\n"
]
}
],
"source": [
"fit_context = pipeline.fit(start_=1, output_=2, **fit_context)"
]
Expand Down Expand Up @@ -690,7 +699,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -704,7 +713,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
"version": "3.8.16"
}
},
"nbformat": 4,
Expand Down
30 changes: 24 additions & 6 deletions examples/tutorials/6. Flexible outputs specification.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@
"metadata": {},
"outputs": [],
"source": [
"from mlprimitives.datasets import load_dataset\n",
"from utils import load_census\n",
"\n",
"dataset = load_dataset('census')"
"dataset = load_census()"
]
},
{
Expand Down Expand Up @@ -420,7 +420,16 @@
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/sarah/anaconda3/envs/mlp/lib/python3.8/site-packages/sklearn/impute/_base.py:382: FutureWarning: The 'verbose' parameter was deprecated in version 1.1 and will be removed in 1.3. A warning will always be raised upon the removal of empty columns in the future version.\n",
" warnings.warn(\n"
]
}
],
"source": [
"output_spec = [\n",
" 'sklearn.impute.SimpleImputer#1.X',\n",
Expand All @@ -441,7 +450,16 @@
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/sarah/anaconda3/envs/mlp/lib/python3.8/site-packages/sklearn/impute/_base.py:382: FutureWarning: The 'verbose' parameter was deprecated in version 1.1 and will be removed in 1.3. A warning will always be raised upon the removal of empty columns in the future version.\n",
" warnings.warn(\n"
]
}
],
"source": [
"output_spec = [\n",
" 'mlprimitives.custom.feature_extraction.CategoricalEncoder#1.X',\n",
Expand Down Expand Up @@ -495,7 +513,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -509,7 +527,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
"version": "3.8.16"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions examples/tutorials/7. Tuning a Pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@
"metadata": {},
"outputs": [],
"source": [
"from mlprimitives.datasets import load_dataset\n",
"from utils import load_census\n",
"\n",
"dataset = load_dataset('census')"
"dataset = load_census()"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@
"metadata": {},
"outputs": [],
"source": [
"from mlprimitives.datasets import load_dataset\n",
"from utils import load_census\n",
"\n",
"dataset = load_dataset('census')"
"dataset = load_census()"
]
},
{
Expand Down Expand Up @@ -309,9 +309,7 @@
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [
{
"data": {
Expand Down Expand Up @@ -536,9 +534,7 @@
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [
{
"data": {
Expand Down Expand Up @@ -707,9 +703,7 @@
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [
{
"data": {
Expand Down Expand Up @@ -772,7 +766,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -786,7 +780,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
"version": "3.8.16"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 68538ac

Please sign in to comment.