Skip to content

Commit

Permalink
🎨 Auto lint jupyter notebooks with black
Browse files Browse the repository at this point in the history
Changing single quotes to double quotes, enforce max line length of 88 for code, strip whitespace, etc. See https://black.readthedocs.io/en/stable/the_black_code_style.html

Changes made by 1) adding "formats": "ipynb,py:percent" to the jupyter notebook's metadata. 2) Saving the .py files using python-black v0.6.0 inside the Atom editor https://github.com/mikehoyio/atom-python-black, which automatically triggers the linting in the .py and .ipynb script, and 3) Using git add --patch *.ipynb to only stage the changed code and not the outputs of the cell (which disappears...), with some fine tuning in gitkraken.
  • Loading branch information
weiji14 committed Nov 26, 2018
1 parent 20fcb35 commit 8487e88
Show file tree
Hide file tree
Showing 4 changed files with 808 additions and 504 deletions.
162 changes: 103 additions & 59 deletions data_prep.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@
"import skimage.util.shape\n",
"import xarray as xr\n",
"\n",
"print('Python :', sys.version.split('\\n')[0])\n",
"print('GMT :', gmt.__version__)\n",
"print('Numpy :', np.__version__)\n",
"print('Rasterio :', rasterio.__version__)\n",
"print('Scikit-image :', skimage.__version__)\n",
"print('Xarray :', xr.__version__)"
"print(\"Python :\", sys.version.split(\"\\n\")[0])\n",
"print(\"GMT :\", gmt.__version__)\n",
"print(\"Numpy :\", np.__version__)\n",
"print(\"Rasterio :\", rasterio.__version__)\n",
"print(\"Scikit-image :\", skimage.__version__)\n",
"print(\"Xarray :\", xr.__version__)"
]
},
{
Expand All @@ -78,23 +78,25 @@
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"metadata": {
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
"def download_to_path(path:str, url:str):\n",
"def download_to_path(path: str, url: str):\n",
" r\"\"\"\n",
" Download from a url to a path\n",
" \n",
"\n",
" >>> download_to_path(path=\"highres/Data_20171204_02.csv\",\n",
" ... url=\"https://data.cresis.ku.edu/data/rds/2017_Antarctica_Basler/csv_good/Data_20171204_02.csv\")\n",
" <Response [200]>\n",
" >>> open(\"highres/Data_20171204_02.csv\").readlines()\n",
" ['LAT,LON,UTCTIMESOD,THICK,ELEVATION,FRAME,SURFACE,BOTTOM,QUALITY\\n']\n",
" >>> os.remove(path=\"highres/Data_20171204_02.csv\")\n",
" \"\"\"\n",
" #if not os.path.exists(path=path):\n",
" # if not os.path.exists(path=path):\n",
" r = requests.get(url=url, stream=True)\n",
" with open(file=path, mode='wb') as fd:\n",
" with open(file=path, mode=\"wb\") as fd:\n",
" for chunk in r.iter_content(chunk_size=1024):\n",
" fd.write(chunk)\n",
" return r"
Expand All @@ -103,13 +105,15 @@
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"metadata": {
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
"def check_sha256(path: str):\n",
" \"\"\"\n",
" Returns SHA256 checksum of a file\n",
" \n",
"\n",
" >>> download_to_path(path=\"highres/Data_20171204_02.csv\",\n",
" ... url=\"https://data.cresis.ku.edu/data/rds/2017_Antarctica_Basler/csv_good/Data_20171204_02.csv\")\n",
" <Response [200]>\n",
Expand All @@ -133,7 +137,9 @@
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"metadata": {
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
"def parse_datalist(\n",
Expand Down Expand Up @@ -184,33 +190,41 @@
"source": [
"# Code to autogenerate README.md files in highres/lowres/misc folders from data_list.yml\n",
"columns = [\"Filename\", \"Location\", \"Resolution\", \"Literature Citation\", \"Data Citation\"]\n",
"for folder, md_header in [(\"lowres\", \"Low Resolution\"),\n",
" (\"highres\", \"High Resolution\"),\n",
" (\"misc\", \"Miscellaneous\")]:\n",
" assert(folder in pd.unique(dataframe[\"folder\"]))\n",
"for folder, md_header in [\n",
" (\"lowres\", \"Low Resolution\"),\n",
" (\"highres\", \"High Resolution\"),\n",
" (\"misc\", \"Miscellaneous\"),\n",
"]:\n",
" assert folder in pd.unique(dataframe[\"folder\"])\n",
" md_name = f\"{folder}/README.md\"\n",
" \n",
"\n",
" with open(file=md_name, mode=\"w\") as md_file:\n",
" md_file.write(f\"# {md_header} Antarctic datasets\\n\\n\")\n",
" md_file.write(\"Note: This file was automatically generated from \")\n",
" md_file.write(\"[data_list.yml](/data_list.yml) using \")\n",
" md_file.write(\"[data_prep.ipynb](/data_prep.ipynb)\\n\\n\")\n",
" \n",
"\n",
" md_table = pd.DataFrame(columns=columns)\n",
" md_table.loc[0] = ['---','---','---','---','---']\n",
" \n",
" md_table.loc[0] = [\"---\", \"---\", \"---\", \"---\", \"---\"]\n",
"\n",
" keydf = dataframe.groupby(\"citekey\").aggregate(lambda x: set(x).pop())\n",
" for row in keydf.loc[keydf[\"folder\"] == folder].itertuples():\n",
" filecount = len(dataframe[dataframe[\"citekey\"] == row.Index])\n",
" extension = os.path.splitext(row.filename)[-1]\n",
" row_dict = {\"Filename\": row.filename if filecount == 1 else f\"{filecount} *{extension} files\",\n",
" \"Location\": row.location,\n",
" \"Resolution\": row.resolution,\n",
" \"Literature Citation\": f\"[{row.Index}]({row.doi_literature})\",\n",
" \"Data Citation\": f\"[DOI]({row.doi_dataset})\" if row.doi_dataset!='nan' else None}\n",
" row_dict = {\n",
" \"Filename\": row.filename\n",
" if filecount == 1\n",
" else f\"{filecount} *{extension} files\",\n",
" \"Location\": row.location,\n",
" \"Resolution\": row.resolution,\n",
" \"Literature Citation\": f\"[{row.Index}]({row.doi_literature})\",\n",
" \"Data Citation\": f\"[DOI]({row.doi_dataset})\"\n",
" if row.doi_dataset != \"nan\"\n",
" else None,\n",
" }\n",
" md_table = md_table.append(other=row_dict, ignore_index=True)\n",
" \n",
" md_table.to_csv(path_or_buf=md_name, mode='a', sep=\"|\", index=False)"
"\n",
" md_table.to_csv(path_or_buf=md_name, mode=\"a\", sep=\"|\", index=False)"
]
},
{
Expand Down Expand Up @@ -258,10 +272,10 @@
],
"source": [
"for dataset in dataframe.loc[dataframe[\"folder\"] == \"lowres\"].itertuples():\n",
" path = f\"{dataset.folder}/{dataset.filename}\" #path to download the file to\n",
" path = f\"{dataset.folder}/{dataset.filename}\" # path to download the file to\n",
" if not os.path.exists(path=path):\n",
" download_to_path(path=path, url=dataset.url)\n",
" assert(check_sha256(path=path) == dataset.sha256)\n",
" assert check_sha256(path=path) == dataset.sha256\n",
"pprint_table(dataframe, \"lowres\")"
]
},
Expand All @@ -285,7 +299,7 @@
],
"source": [
"with rasterio.open(\"lowres/bedmap2_bed.tif\") as raster_source:\n",
" rasterio.plot.show(source=raster_source, cmap='BrBG_r')"
" rasterio.plot.show(source=raster_source, cmap=\"BrBG_r\")"
]
},
{
Expand Down Expand Up @@ -339,10 +353,10 @@
],
"source": [
"for dataset in dataframe.loc[dataframe[\"folder\"] == \"misc\"].itertuples():\n",
" path = f\"{dataset.folder}/{dataset.filename}\" #path to download the file to\n",
" path = f\"{dataset.folder}/{dataset.filename}\" # path to download the file to\n",
" if not os.path.exists(path=path):\n",
" download_to_path(path=path, url=dataset.url)\n",
" assert(check_sha256(path=path) == dataset.sha256)\n",
" assert check_sha256(path=path) == dataset.sha256\n",
"pprint_table(dataframe, \"misc\")"
]
},
Expand Down Expand Up @@ -523,10 +537,10 @@
],
"source": [
"for dataset in dataframe.loc[dataframe[\"folder\"] == \"highres\"].itertuples():\n",
" path = f\"{dataset.folder}/{dataset.filename}\" #path to download the file to\n",
" path = f\"{dataset.folder}/{dataset.filename}\" # path to download the file to\n",
" if not os.path.exists(path=path):\n",
" download_to_path(path=path, url=dataset.url)\n",
" assert(check_sha256(path=path) == dataset.sha256)\n",
" assert check_sha256(path=path) == dataset.sha256\n",
"pprint_table(dataframe, \"highres\")"
]
},
Expand Down Expand Up @@ -555,7 +569,9 @@
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"metadata": {
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
"def ascii_to_xyz(pipeline_file: str) -> pd.DataFrame:\n",
Expand Down Expand Up @@ -654,7 +670,7 @@
"source": [
"xyz_dict = {}\n",
"for pf in sorted(glob.glob(\"highres/*.json\")):\n",
" print(f\"Processing {pf} pipeline\", end=' ... ')\n",
" print(f\"Processing {pf} pipeline\", end=\" ... \")\n",
" name = os.path.splitext(os.path.basename(pf))[0]\n",
" xyz_dict[name] = ascii_to_xyz(pipeline_file=pf)\n",
" print(f\"{len(xyz_dict[name])} datapoints\")"
Expand All @@ -672,15 +688,17 @@
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"metadata": {
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
"def get_region(xyz_data: pd.DataFrame) -> str:\n",
" \"\"\"\n",
" Gets the bounding box region of an xyz pandas.DataFrame in string\n",
" format xmin/xmax/ymin/ymax rounded to 5 decimal places.\n",
" Used for the -R 'region of interest' parameter in GMT.\n",
" \n",
"\n",
" >>> xyz_data = pd.DataFrame(np.random.RandomState(seed=42).rand(30).reshape(10, 3))\n",
" >>> get_region(xyz_data=xyz_data)\n",
" '0.05808/0.83244/0.02058/0.95071'\n",
Expand All @@ -693,7 +711,9 @@
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"metadata": {
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
"def xyz_to_grid(\n",
Expand Down Expand Up @@ -773,10 +793,12 @@
"source": [
"grid_dict = {}\n",
"for name in xyz_dict.keys():\n",
" print(f\"Gridding {name}\", end=' ... ')\n",
" print(f\"Gridding {name}\", end=\" ... \")\n",
" xyz_data = xyz_dict[name]\n",
" region = get_region(xyz_data)\n",
" grid_dict[name] = xyz_to_grid(xyz_data=xyz_data, region=region, outfile=f\"highres/{name}.nc\")\n",
" grid_dict[name] = xyz_to_grid(\n",
" xyz_data=xyz_data, region=region, outfile=f\"highres/{name}.nc\"\n",
" )\n",
" print(f\"done! {grid_dict[name].to_array().shape}\")"
]
},
Expand Down Expand Up @@ -807,11 +829,15 @@
],
"source": [
"grids = sorted(glob.glob(\"highres/*.nc\"))\n",
"fig, axarr = plt.subplots(nrows=1+((len(grids)-1)//3), ncols=3, squeeze=False, figsize=(15,15))\n",
"fig, axarr = plt.subplots(\n",
" nrows=1 + ((len(grids) - 1) // 3), ncols=3, squeeze=False, figsize=(15, 15)\n",
")\n",
"\n",
"for i, grid in enumerate(grids):\n",
" with rasterio.open(grid) as raster_source:\n",
" rasterio.plot.show(source=raster_source, cmap='BrBG_r', ax=axarr[i//3,i%3], title=grid)"
" rasterio.plot.show(\n",
" source=raster_source, cmap=\"BrBG_r\", ax=axarr[i // 3, i % 3], title=grid\n",
" )"
]
},
{
Expand All @@ -831,7 +857,9 @@
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"metadata": {
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
"def get_window_bounds(\n",
Expand All @@ -841,7 +869,7 @@
" Reads in a raster and finds tiles for them according to a stepped moving window.\n",
" Returns a list of bounding box coordinates corresponding to a tile that looks like\n",
" [(minx, miny, maxx, maxy), (minx, miny, maxx, maxy), ...]\n",
" \n",
"\n",
" >>> xr.DataArray(\n",
" ... data=np.zeros(shape=(36, 32)),\n",
" ... coords={\"x\": np.arange(1, 37), \"y\": np.arange(1, 33)},\n",
Expand Down Expand Up @@ -918,7 +946,7 @@
"filepaths = sorted([g for g in glob.glob(\"highres/*.nc\") if g != \"highres/2007tx.nc\"])\n",
"window_bounds = [get_window_bounds(filepath=grid) for grid in filepaths]\n",
"window_bounds_concat = np.concatenate([w for w in window_bounds]).tolist()\n",
"print(f'Total number of tiles: {len(window_bounds_concat)}')"
"print(f\"Total number of tiles: {len(window_bounds_concat)}\")"
]
},
{
Expand Down Expand Up @@ -948,7 +976,9 @@
}
],
"source": [
"shapely.geometry.MultiPolygon([shapely.geometry.box(*bound) for bound in window_bounds_concat])"
"shapely.geometry.MultiPolygon(\n",
" [shapely.geometry.box(*bound) for bound in window_bounds_concat]\n",
")"
]
},
{
Expand All @@ -961,7 +991,9 @@
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"metadata": {
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
"def selective_tile(\n",
Expand Down Expand Up @@ -1044,7 +1076,10 @@
}
],
"source": [
"hireses = [selective_tile(filepath=f, window_bounds=w) for f, w in zip(filepaths, window_bounds)]\n",
"hireses = [\n",
" selective_tile(filepath=f, window_bounds=w)\n",
" for f, w in zip(filepaths, window_bounds)\n",
"]\n",
"hires = np.concatenate(hireses)\n",
"print(hires.shape, hires.dtype)"
]
Expand All @@ -1071,7 +1106,9 @@
}
],
"source": [
"lores = selective_tile(filepath=\"lowres/bedmap2_bed.tif\", window_bounds=window_bounds_concat)\n",
"lores = selective_tile(\n",
" filepath=\"lowres/bedmap2_bed.tif\", window_bounds=window_bounds_concat\n",
")\n",
"print(lores.shape, lores.dtype)"
]
},
Expand All @@ -1097,7 +1134,9 @@
}
],
"source": [
"rema = selective_tile(filepath=\"misc/REMA_200m_dem_filled.tif\", window_bounds=window_bounds_concat)\n",
"rema = selective_tile(\n",
" filepath=\"misc/REMA_200m_dem_filled.tif\", window_bounds=window_bounds_concat\n",
")\n",
"print(rema.shape, rema.dtype)"
]
},
Expand All @@ -1116,7 +1155,11 @@
}
],
"source": [
"measuresiceflow = selective_tile(filepath=\"misc/MEaSUREs_IceFlowSpeed_450m.tif\", window_bounds=window_bounds_concat, out_shape=(16,16))\n",
"measuresiceflow = selective_tile(\n",
" filepath=\"misc/MEaSUREs_IceFlowSpeed_450m.tif\",\n",
" window_bounds=window_bounds_concat,\n",
" out_shape=(16, 16),\n",
")\n",
"print(measuresiceflow.shape, measuresiceflow.dtype)"
]
},
Expand Down Expand Up @@ -1189,10 +1232,10 @@
"metadata": {},
"outputs": [],
"source": [
"quilt.build(package='weiji14/deepbedmap/model/train/W1_data', path=rema)\n",
"quilt.build(package='weiji14/deepbedmap/model/train/W2_data', path=measuresiceflow)\n",
"quilt.build(package='weiji14/deepbedmap/model/train/X_data', path=lores)\n",
"quilt.build(package='weiji14/deepbedmap/model/train/Y_data', path=hires)"
"quilt.build(package=\"weiji14/deepbedmap/model/train/W1_data\", path=rema)\n",
"quilt.build(package=\"weiji14/deepbedmap/model/train/W2_data\", path=measuresiceflow)\n",
"quilt.build(package=\"weiji14/deepbedmap/model/train/X_data\", path=lores)\n",
"quilt.build(package=\"weiji14/deepbedmap/model/train/Y_data\", path=hires)"
]
},
{
Expand Down Expand Up @@ -1254,12 +1297,13 @@
}
],
"source": [
"quilt.push(package='weiji14/deepbedmap', is_public=True)"
"quilt.push(package=\"weiji14/deepbedmap\", is_public=True)"
]
}
],
"metadata": {
"jupytext": {
"formats": "ipynb,py:percent",
"text_representation": {
"extension": ".py",
"format_name": "percent",
Expand Down
Loading

0 comments on commit 8487e88

Please sign in to comment.