🎨 Auto lint jupyter notebooks with black

Changing single quotes to double quotes, enforce max line length of 88 for code, strip whitespace, etc. See https://black.readthedocs.io/en/stable/the_black_code_style.html Changes made by 1) adding "formats": "ipynb,py:percent" to the jupyter notebook's metadata. 2) Saving the .py files using python-black v0.6.0 inside the Atom editor https://github.com/mikehoyio/atom-python-black, which automatically triggers the linting in the .py and .ipynb script, and 3) Using git add --patch *.ipynb to only stage the changed code and not the outputs of the cell (which disappears...), with some fine tuning in gitkraken.
weiji14 · Nov 26, 2018 · 8487e88 · 8487e88
1 parent 20fcb35
commit 8487e88
Show file tree

Hide file tree

Showing 4 changed files with 808 additions and 504 deletions.
diff --git a/data_prep.ipynb b/data_prep.ipynb
@@ -60,12 +60,12 @@
     "import skimage.util.shape\n",
     "import xarray as xr\n",
     "\n",
-    "print('Python       :', sys.version.split('\\n')[0])\n",
-    "print('GMT          :', gmt.__version__)\n",
-    "print('Numpy        :', np.__version__)\n",
-    "print('Rasterio     :', rasterio.__version__)\n",
-    "print('Scikit-image :', skimage.__version__)\n",
-    "print('Xarray       :', xr.__version__)"
+    "print(\"Python       :\", sys.version.split(\"\\n\")[0])\n",
+    "print(\"GMT          :\", gmt.__version__)\n",
+    "print(\"Numpy        :\", np.__version__)\n",
+    "print(\"Rasterio     :\", rasterio.__version__)\n",
+    "print(\"Scikit-image :\", skimage.__version__)\n",
+    "print(\"Xarray       :\", xr.__version__)"
    ]
   },
   {
@@ -78,23 +78,25 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {},
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
    "outputs": [],
    "source": [
-    "def download_to_path(path:str, url:str):\n",
+    "def download_to_path(path: str, url: str):\n",
     "    r\"\"\"\n",
     "    Download from a url to a path\n",
-    "    \n",
+    "\n",
     "    >>> download_to_path(path=\"highres/Data_20171204_02.csv\",\n",
     "    ...                  url=\"https://data.cresis.ku.edu/data/rds/2017_Antarctica_Basler/csv_good/Data_20171204_02.csv\")\n",
     "    <Response [200]>\n",
     "    >>> open(\"highres/Data_20171204_02.csv\").readlines()\n",
     "    ['LAT,LON,UTCTIMESOD,THICK,ELEVATION,FRAME,SURFACE,BOTTOM,QUALITY\\n']\n",
     "    >>> os.remove(path=\"highres/Data_20171204_02.csv\")\n",
     "    \"\"\"\n",
-    "    #if not os.path.exists(path=path):\n",
+    "    # if not os.path.exists(path=path):\n",
     "    r = requests.get(url=url, stream=True)\n",
-    "    with open(file=path, mode='wb') as fd:\n",
+    "    with open(file=path, mode=\"wb\") as fd:\n",
     "        for chunk in r.iter_content(chunk_size=1024):\n",
     "            fd.write(chunk)\n",
     "    return r"
@@ -103,13 +105,15 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {},
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
    "outputs": [],
    "source": [
     "def check_sha256(path: str):\n",
     "    \"\"\"\n",
     "    Returns SHA256 checksum of a file\n",
-    "    \n",
+    "\n",
     "    >>> download_to_path(path=\"highres/Data_20171204_02.csv\",\n",
     "    ...                  url=\"https://data.cresis.ku.edu/data/rds/2017_Antarctica_Basler/csv_good/Data_20171204_02.csv\")\n",
     "    <Response [200]>\n",
@@ -133,7 +137,9 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {},
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
    "outputs": [],
    "source": [
     "def parse_datalist(\n",
@@ -184,33 +190,41 @@
    "source": [
     "# Code to autogenerate README.md files in highres/lowres/misc folders from data_list.yml\n",
     "columns = [\"Filename\", \"Location\", \"Resolution\", \"Literature Citation\", \"Data Citation\"]\n",
-    "for folder, md_header in [(\"lowres\", \"Low Resolution\"),\n",
-    "                          (\"highres\", \"High Resolution\"),\n",
-    "                          (\"misc\", \"Miscellaneous\")]:\n",
-    "    assert(folder in pd.unique(dataframe[\"folder\"]))\n",
+    "for folder, md_header in [\n",
+    "    (\"lowres\", \"Low Resolution\"),\n",
+    "    (\"highres\", \"High Resolution\"),\n",
+    "    (\"misc\", \"Miscellaneous\"),\n",
+    "]:\n",
+    "    assert folder in pd.unique(dataframe[\"folder\"])\n",
     "    md_name = f\"{folder}/README.md\"\n",
-    "    \n",
+    "\n",
     "    with open(file=md_name, mode=\"w\") as md_file:\n",
     "        md_file.write(f\"# {md_header} Antarctic datasets\\n\\n\")\n",
     "        md_file.write(\"Note: This file was automatically generated from \")\n",
     "        md_file.write(\"[data_list.yml](/data_list.yml) using \")\n",
     "        md_file.write(\"[data_prep.ipynb](/data_prep.ipynb)\\n\\n\")\n",
-    "        \n",
+    "\n",
     "    md_table = pd.DataFrame(columns=columns)\n",
-    "    md_table.loc[0] = ['---','---','---','---','---']\n",
-    "    \n",
+    "    md_table.loc[0] = [\"---\", \"---\", \"---\", \"---\", \"---\"]\n",
+    "\n",
     "    keydf = dataframe.groupby(\"citekey\").aggregate(lambda x: set(x).pop())\n",
     "    for row in keydf.loc[keydf[\"folder\"] == folder].itertuples():\n",
     "        filecount = len(dataframe[dataframe[\"citekey\"] == row.Index])\n",
     "        extension = os.path.splitext(row.filename)[-1]\n",
-    "        row_dict = {\"Filename\": row.filename if filecount == 1 else f\"{filecount} *{extension} files\",\n",
-    "                    \"Location\": row.location,\n",
-    "                    \"Resolution\": row.resolution,\n",
-    "                    \"Literature Citation\": f\"[{row.Index}]({row.doi_literature})\",\n",
-    "                    \"Data Citation\": f\"[DOI]({row.doi_dataset})\" if row.doi_dataset!='nan' else None}\n",
+    "        row_dict = {\n",
+    "            \"Filename\": row.filename\n",
+    "            if filecount == 1\n",
+    "            else f\"{filecount} *{extension} files\",\n",
+    "            \"Location\": row.location,\n",
+    "            \"Resolution\": row.resolution,\n",
+    "            \"Literature Citation\": f\"[{row.Index}]({row.doi_literature})\",\n",
+    "            \"Data Citation\": f\"[DOI]({row.doi_dataset})\"\n",
+    "            if row.doi_dataset != \"nan\"\n",
+    "            else None,\n",
+    "        }\n",
     "        md_table = md_table.append(other=row_dict, ignore_index=True)\n",
-    "    \n",
-    "    md_table.to_csv(path_or_buf=md_name, mode='a', sep=\"|\", index=False)"
+    "\n",
+    "    md_table.to_csv(path_or_buf=md_name, mode=\"a\", sep=\"|\", index=False)"
    ]
   },
   {
@@ -258,10 +272,10 @@
    ],
    "source": [
     "for dataset in dataframe.loc[dataframe[\"folder\"] == \"lowres\"].itertuples():\n",
-    "    path = f\"{dataset.folder}/{dataset.filename}\" #path to download the file to\n",
+    "    path = f\"{dataset.folder}/{dataset.filename}\"  # path to download the file to\n",
     "    if not os.path.exists(path=path):\n",
     "        download_to_path(path=path, url=dataset.url)\n",
-    "    assert(check_sha256(path=path) == dataset.sha256)\n",
+    "    assert check_sha256(path=path) == dataset.sha256\n",
     "pprint_table(dataframe, \"lowres\")"
    ]
   },
@@ -285,7 +299,7 @@
    ],
    "source": [
     "with rasterio.open(\"lowres/bedmap2_bed.tif\") as raster_source:\n",
-    "    rasterio.plot.show(source=raster_source, cmap='BrBG_r')"
+    "    rasterio.plot.show(source=raster_source, cmap=\"BrBG_r\")"
    ]
   },
   {
@@ -339,10 +353,10 @@
    ],
    "source": [
     "for dataset in dataframe.loc[dataframe[\"folder\"] == \"misc\"].itertuples():\n",
-    "    path = f\"{dataset.folder}/{dataset.filename}\" #path to download the file to\n",
+    "    path = f\"{dataset.folder}/{dataset.filename}\"  # path to download the file to\n",
     "    if not os.path.exists(path=path):\n",
     "        download_to_path(path=path, url=dataset.url)\n",
-    "    assert(check_sha256(path=path) == dataset.sha256)\n",
+    "    assert check_sha256(path=path) == dataset.sha256\n",
     "pprint_table(dataframe, \"misc\")"
    ]
   },
@@ -523,10 +537,10 @@
    ],
    "source": [
     "for dataset in dataframe.loc[dataframe[\"folder\"] == \"highres\"].itertuples():\n",
-    "    path = f\"{dataset.folder}/{dataset.filename}\" #path to download the file to\n",
+    "    path = f\"{dataset.folder}/{dataset.filename}\"  # path to download the file to\n",
     "    if not os.path.exists(path=path):\n",
     "        download_to_path(path=path, url=dataset.url)\n",
-    "    assert(check_sha256(path=path) == dataset.sha256)\n",
+    "    assert check_sha256(path=path) == dataset.sha256\n",
     "pprint_table(dataframe, \"highres\")"
    ]
   },
@@ -555,7 +569,9 @@
   {
    "cell_type": "code",
    "execution_count": 11,
-   "metadata": {},
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
    "outputs": [],
    "source": [
     "def ascii_to_xyz(pipeline_file: str) -> pd.DataFrame:\n",
@@ -654,7 +670,7 @@
    "source": [
     "xyz_dict = {}\n",
     "for pf in sorted(glob.glob(\"highres/*.json\")):\n",
-    "    print(f\"Processing {pf} pipeline\", end=' ... ')\n",
+    "    print(f\"Processing {pf} pipeline\", end=\" ... \")\n",
     "    name = os.path.splitext(os.path.basename(pf))[0]\n",
     "    xyz_dict[name] = ascii_to_xyz(pipeline_file=pf)\n",
     "    print(f\"{len(xyz_dict[name])} datapoints\")"
@@ -672,15 +688,17 @@
   {
    "cell_type": "code",
    "execution_count": 13,
-   "metadata": {},
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
    "outputs": [],
    "source": [
     "def get_region(xyz_data: pd.DataFrame) -> str:\n",
     "    \"\"\"\n",
     "    Gets the bounding box region of an xyz pandas.DataFrame in string\n",
     "    format xmin/xmax/ymin/ymax rounded to 5 decimal places.\n",
     "    Used for the -R 'region of interest' parameter in GMT.\n",
-    "    \n",
+    "\n",
     "    >>> xyz_data = pd.DataFrame(np.random.RandomState(seed=42).rand(30).reshape(10, 3))\n",
     "    >>> get_region(xyz_data=xyz_data)\n",
     "    '0.05808/0.83244/0.02058/0.95071'\n",
@@ -693,7 +711,9 @@
   {
    "cell_type": "code",
    "execution_count": 14,
-   "metadata": {},
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
    "outputs": [],
    "source": [
     "def xyz_to_grid(\n",
@@ -773,10 +793,12 @@
    "source": [
     "grid_dict = {}\n",
     "for name in xyz_dict.keys():\n",
-    "    print(f\"Gridding {name}\", end=' ... ')\n",
+    "    print(f\"Gridding {name}\", end=\" ... \")\n",
     "    xyz_data = xyz_dict[name]\n",
     "    region = get_region(xyz_data)\n",
-    "    grid_dict[name] = xyz_to_grid(xyz_data=xyz_data, region=region, outfile=f\"highres/{name}.nc\")\n",
+    "    grid_dict[name] = xyz_to_grid(\n",
+    "        xyz_data=xyz_data, region=region, outfile=f\"highres/{name}.nc\"\n",
+    "    )\n",
     "    print(f\"done! {grid_dict[name].to_array().shape}\")"
    ]
   },
@@ -807,11 +829,15 @@
    ],
    "source": [
     "grids = sorted(glob.glob(\"highres/*.nc\"))\n",
-    "fig, axarr = plt.subplots(nrows=1+((len(grids)-1)//3), ncols=3, squeeze=False, figsize=(15,15))\n",
+    "fig, axarr = plt.subplots(\n",
+    "    nrows=1 + ((len(grids) - 1) // 3), ncols=3, squeeze=False, figsize=(15, 15)\n",
+    ")\n",
     "\n",
     "for i, grid in enumerate(grids):\n",
     "    with rasterio.open(grid) as raster_source:\n",
-    "        rasterio.plot.show(source=raster_source, cmap='BrBG_r', ax=axarr[i//3,i%3], title=grid)"
+    "        rasterio.plot.show(\n",
+    "            source=raster_source, cmap=\"BrBG_r\", ax=axarr[i // 3, i % 3], title=grid\n",
+    "        )"
    ]
   },
   {
@@ -831,7 +857,9 @@
   {
    "cell_type": "code",
    "execution_count": 17,
-   "metadata": {},
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
    "outputs": [],
    "source": [
     "def get_window_bounds(\n",
@@ -841,7 +869,7 @@
     "    Reads in a raster and finds tiles for them according to a stepped moving window.\n",
     "    Returns a list of bounding box coordinates corresponding to a tile that looks like\n",
     "    [(minx, miny, maxx, maxy), (minx, miny, maxx, maxy), ...]\n",
-    "    \n",
+    "\n",
     "    >>> xr.DataArray(\n",
     "    ...     data=np.zeros(shape=(36, 32)),\n",
     "    ...     coords={\"x\": np.arange(1, 37), \"y\": np.arange(1, 33)},\n",
@@ -918,7 +946,7 @@
     "filepaths = sorted([g for g in glob.glob(\"highres/*.nc\") if g != \"highres/2007tx.nc\"])\n",
     "window_bounds = [get_window_bounds(filepath=grid) for grid in filepaths]\n",
     "window_bounds_concat = np.concatenate([w for w in window_bounds]).tolist()\n",
-    "print(f'Total number of tiles: {len(window_bounds_concat)}')"
+    "print(f\"Total number of tiles: {len(window_bounds_concat)}\")"
    ]
   },
   {
@@ -948,7 +976,9 @@
     }
    ],
    "source": [
-    "shapely.geometry.MultiPolygon([shapely.geometry.box(*bound) for bound in window_bounds_concat])"
+    "shapely.geometry.MultiPolygon(\n",
+    "    [shapely.geometry.box(*bound) for bound in window_bounds_concat]\n",
+    ")"
    ]
   },
   {
@@ -961,7 +991,9 @@
   {
    "cell_type": "code",
    "execution_count": 20,
-   "metadata": {},
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
    "outputs": [],
    "source": [
     "def selective_tile(\n",
@@ -1044,7 +1076,10 @@
     }
    ],
    "source": [
-    "hireses = [selective_tile(filepath=f, window_bounds=w) for f, w in zip(filepaths, window_bounds)]\n",
+    "hireses = [\n",
+    "    selective_tile(filepath=f, window_bounds=w)\n",
+    "    for f, w in zip(filepaths, window_bounds)\n",
+    "]\n",
     "hires = np.concatenate(hireses)\n",
     "print(hires.shape, hires.dtype)"
    ]
@@ -1071,7 +1106,9 @@
     }
    ],
    "source": [
-    "lores = selective_tile(filepath=\"lowres/bedmap2_bed.tif\", window_bounds=window_bounds_concat)\n",
+    "lores = selective_tile(\n",
+    "    filepath=\"lowres/bedmap2_bed.tif\", window_bounds=window_bounds_concat\n",
+    ")\n",
     "print(lores.shape, lores.dtype)"
    ]
   },
@@ -1097,7 +1134,9 @@
     }
    ],
    "source": [
-    "rema = selective_tile(filepath=\"misc/REMA_200m_dem_filled.tif\", window_bounds=window_bounds_concat)\n",
+    "rema = selective_tile(\n",
+    "    filepath=\"misc/REMA_200m_dem_filled.tif\", window_bounds=window_bounds_concat\n",
+    ")\n",
     "print(rema.shape, rema.dtype)"
    ]
   },
@@ -1116,7 +1155,11 @@
     }
    ],
    "source": [
-    "measuresiceflow = selective_tile(filepath=\"misc/MEaSUREs_IceFlowSpeed_450m.tif\", window_bounds=window_bounds_concat, out_shape=(16,16))\n",
+    "measuresiceflow = selective_tile(\n",
+    "    filepath=\"misc/MEaSUREs_IceFlowSpeed_450m.tif\",\n",
+    "    window_bounds=window_bounds_concat,\n",
+    "    out_shape=(16, 16),\n",
+    ")\n",
     "print(measuresiceflow.shape, measuresiceflow.dtype)"
    ]
   },
@@ -1189,10 +1232,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "quilt.build(package='weiji14/deepbedmap/model/train/W1_data', path=rema)\n",
-    "quilt.build(package='weiji14/deepbedmap/model/train/W2_data', path=measuresiceflow)\n",
-    "quilt.build(package='weiji14/deepbedmap/model/train/X_data', path=lores)\n",
-    "quilt.build(package='weiji14/deepbedmap/model/train/Y_data', path=hires)"
+    "quilt.build(package=\"weiji14/deepbedmap/model/train/W1_data\", path=rema)\n",
+    "quilt.build(package=\"weiji14/deepbedmap/model/train/W2_data\", path=measuresiceflow)\n",
+    "quilt.build(package=\"weiji14/deepbedmap/model/train/X_data\", path=lores)\n",
+    "quilt.build(package=\"weiji14/deepbedmap/model/train/Y_data\", path=hires)"
    ]
   },
   {
@@ -1254,12 +1297,13 @@
     }
    ],
    "source": [
-    "quilt.push(package='weiji14/deepbedmap', is_public=True)"
+    "quilt.push(package=\"weiji14/deepbedmap\", is_public=True)"
    ]
   }
  ],
  "metadata": {
   "jupytext": {
+   "formats": "ipynb,py:percent",
    "text_representation": {
     "extension": ".py",
     "format_name": "percent",