Skip to content

Commit

Permalink
cleaning up the notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
mtjrider committed Mar 18, 2019
1 parent 084756b commit 6fba22f
Showing 1 changed file with 24 additions and 89 deletions.
113 changes: 24 additions & 89 deletions mortgage/E2E.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -52,20 +52,9 @@
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/nfs/majones/conda/envs/rapids/lib/python3.6/site-packages/dask/config.py:168: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n",
" data = yaml.load(f.read()) or {}\n",
"/home/nfs/majones/conda/envs/rapids/lib/python3.6/site-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n",
" defaults = yaml.load(f)\n"
]
}
],
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import dask_xgboost as dxgb_gpu\n",
Expand All @@ -85,41 +74,9 @@
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table style=\"border: 2px solid white;\">\n",
"<tr>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Client</h3>\n",
"<ul>\n",
" <li><b>Scheduler: </b>tcp://10.33.227.165:37501\n",
" <li><b>Dashboard: </b><a href='http://10.33.227.165:8787/status' target='_blank'>http://10.33.227.165:8787/status</a>\n",
"</ul>\n",
"</td>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Cluster</h3>\n",
"<ul>\n",
" <li><b>Workers: </b>8</li>\n",
" <li><b>Cores: </b>8</li>\n",
" <li><b>Memory: </b>1.08 TB</li>\n",
"</ul>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<Client: scheduler='tcp://10.33.227.165:37501' processes=8 cores=8>"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import subprocess\n",
"\n",
Expand All @@ -142,22 +99,22 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# to download data for this notebook, visit https://rapidsai.github.io/demos/datasets/mortgage-data and update the following paths accordingly\n",
"acq_data_path = \"/datasets/mortgage/mortgage/acq\"\n",
"perf_data_path = \"/datasets/mortgage/mortgage/perf_1000M\"\n",
"col_names_path = \"/datasets/mortgage/mortgage/names.csv\"\n",
"acq_data_path = \"/path/to/acq\"\n",
"perf_data_path = \"path/to/perf\"\n",
"col_names_path = \"/path/to/names.csv\"\n",
"start_year = 2000\n",
"end_year = 2016 # end_year is inclusive\n",
"part_count = 16 # the number of data files to train against"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -179,27 +136,9 @@
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'tcp://10.33.227.165:33247': True,\n",
" 'tcp://10.33.227.165:33773': True,\n",
" 'tcp://10.33.227.165:37184': True,\n",
" 'tcp://10.33.227.165:38419': True,\n",
" 'tcp://10.33.227.165:42962': True,\n",
" 'tcp://10.33.227.165:43195': True,\n",
" 'tcp://10.33.227.165:45119': True,\n",
" 'tcp://10.33.227.165:45562': True}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client.run(initialize_rmm_pool)"
]
Expand All @@ -213,7 +152,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -392,7 +331,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -409,7 +348,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -438,7 +377,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -454,7 +393,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -491,7 +430,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -520,7 +459,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -534,7 +473,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -555,7 +494,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -567,7 +506,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -587,11 +526,7 @@
" df['delinquency_12'] = df['delinquency_12'] > 0\n",
" df['delinquency_12'] = df['delinquency_12'].fillna(False).astype('int32')\n",
" for column in df.columns:\n",
" if column != 'delinquency_12': df[column] = df[column].astype('float32')\n",
" else: df[column] = df[column].astype('int32')\n",
" df[column] = df[column].fillna(np.dtype(str(df[column].dtype)).type(-1))\n",
" print('DF DTYPES')\n",
" print(df.dtypes)\n",
" return df.to_arrow(preserve_index=False)"
]
},
Expand Down

0 comments on commit 6fba22f

Please sign in to comment.