Skip to content

Commit

Permalink
Add Tiles module
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexeyPechnikov committed Mar 4, 2024
1 parent 183f402 commit f114508
Show file tree
Hide file tree
Showing 2 changed files with 333 additions and 1 deletion.
330 changes: 330 additions & 0 deletions pygmtsar/pygmtsar/Tiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,330 @@
# ----------------------------------------------------------------------------
# PyGMTSAR
#
# This file is part of the PyGMTSAR project: https://github.com/mobigroup/gmtsar
#
# Copyright (c) 2024, Alexey Pechnikov
#
# Licensed under the BSD 3-Clause License (see LICENSE for details)
# ----------------------------------------------------------------------------
from .datagrid import datagrid
from .tqdm_joblib import tqdm_joblib

class Tiles(datagrid, tqdm_joblib):

http_timeout = 30
# http_chunk_size = 8*1024**2
# Define typical browser headers
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Referer': 'https://www.example.com',
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'Upgrade-Insecure-Requests': '1',
'DNT': '1',
}

def _download_tile(self, base_url, path_id, tile_id, archive, filetype, product, lon, lat, debug=False):
"""
Download gzipped NetCDF tiles.
"""
import rioxarray as rio
import xarray as xr
import requests
import os
import tempfile
import gzip
import io

product1 = int(product[0])
if product in ['1s', '01s']:
resolution = '30'
elif product in ['3s', '03s']:
resolution = '90'
else:
resolution = ''
SN2 = f'{"S" if lat<0 else "N"}{abs(lat):02}'
WE3 = f'{"W" if lon<0 else "E"}{abs(lon):03}'
params = {
'product': product,
'product1': product1,
'resolution': resolution,
'SN2': SN2,
'WE3': WE3
}
if debug:
print ('DEBUG _download_tile: params', params)
url = base_url.format(**params)
path = path_id.format(**params)
tile = tile_id.format(**params)
tile_url = f'{url}/{path}/{tile}'
if archive is not None and len(archive)>0:
# remove .gz or other archive extension
ext_len = len(archive) + 1
tile_filename = os.path.join(tempfile.gettempdir(), tile[:-ext_len])
else:
tile_filename = os.path.join(tempfile.gettempdir(), tile)
if debug:
print ('DEBUG _download_tile: tile_url', tile_url)
print ('DEBUG _download_tile: tile_filename', tile_filename)
try:
if archive is not None and len(archive)>0:
# with requests.get(tile_url, stream=True, headers=self.headers, timeout=self.http_timeout) as response:
# response.raise_for_status()
# # Stream the content under the context of gzip decompression
# with gzip.GzipFile(fileobj=response.raw) as gz:
# with open(tile_filename, 'wb') as f:
# while True:
# chunk = gz.read(chunk_size)
# if not chunk:
# break
# f.write(chunk)

# with requests.get(tile_url, stream=True, headers=self.headers, timeout=self.http_timeout) as response:
# response.raise_for_status()
# # Stream the content under the context of gzip decompression
# with gzip.GzipFile(fileobj=response.raw) as gz:
# with open(tile_filename, 'wb') as f:
# f.write(gz.read())

with requests.get(tile_url, headers=self.headers, timeout=self.http_timeout) as response:
response.raise_for_status()
# Since we're not streaming, response.content contains the entire gzip-compressed content
# We'll wrap the compressed content in BytesIO so it behaves like a file object
compressed_content = io.BytesIO(response.content)
# Decompress and write the content
with gzip.GzipFile(fileobj=compressed_content) as gz:
with open(tile_filename, 'wb') as f:
f.write(gz.read())
else:
# with requests.get(tile_url, stream=True, headers=self.headers, timeout=self.http_timeout) as response:
# response.raise_for_status()
# with open(tile_filename, 'wb') as f:
# for chunk in response.iter_content(chunk_size=self.http_chunk_size):
# f.write(chunk)
with requests.get(tile_url, headers=self.headers, timeout=self.http_timeout) as response:
response.raise_for_status()
with open(tile_filename, 'wb') as f:
f.write(response.content)
if filetype == 'netcdf':
tile = xr.open_dataarray(tile_filename).load()
tile.attrs = {}
for coord in tile.coords:
tile.coords[coord].attrs = {}
elif filetype == 'geotif':
with rio.open_rasterio(tile_filename) as raster:
tile = raster\
.squeeze(drop=True)\
.rename({'y': 'lat', 'x': 'lon'})\
.drop_vars('spatial_ref')\
.load()
if tile.lat.diff('lat')[0].item() < 0:
tile = tile.reindex(lat=tile.lat[::-1])
else:
raise Exception(f'ERROR:: unknown tiles file type {filetype}. Expected "netcdf" or "geotif".')
except requests.exceptions.RequestException as e:
# offshore tiles are missed by design
print(f'Request error for {tile_id}: {e}')
tile = None
except Exception as e:
print(e)
raise
finally:
if os.path.exists(tile_filename):
os.remove(tile_filename)
return tile

def _download(self, base_url, path_id, tile_id, archive, filetype,
geometry, filename=None, product='1s',
n_jobs=4, joblib_backend='loky', skip_exist=True, debug=False):
"""
Download and merge gzipped NetCDF tiles from a defined access point.
"""
import xarray as xr
import numpy as np
from tqdm.auto import tqdm
import joblib
import os

assert product in ['1s', '3s'], f'ERROR: product name is invalid: {product}. Expected names are "1s", "3s".'

if filename is not None and os.path.exists(filename) and skip_exist:
print ('NOTE: Target file exists, ignore the command. Use "skip_exist=False" or omit the filename to allow new downloading.')
return

bounds = self.get_bounds(geometry)

# it produces 4 tiles for cases like (39.5, 39.5, 40.0, 40.0)
#left, right = int(np.floor(lon_start)), int(np.floor(lon_end))
#bottom, top = int(np.floor(lat_start)), int(np.floor(lat_end))
# enhancement to produce a single tile for cases like (39.5, 39.5, 40.0, 40.0)
lon_start, lat_start, lon_end, lat_end = bounds
left = np.floor(min(lon_start, lon_end))
right = np.ceil(max(lon_start, lon_end)) - 1
bottom = np.floor(min(lat_start, lat_end))
top = np.ceil(max(lat_start, lat_end)) - 1
left, right = int(left), int(right)
bottom, top = int(bottom), int(top)
#print ('left, right', left, right, 'bottom, top', bottom, top)

if joblib_backend is None or n_jobs is None:
# do not use joblib parallel processing
tile_xarrays = []
with self.tqdm_joblib(tqdm(desc=f'Tiles Downloading', total=(right-left+1)*(top-bottom+1))) as pbar:
for x in range(left, right + 1):
for y in range(bottom, top + 1):
tile = self._download_tile(base_url, path_id, tile_id, archive, filetype, product, x, y, debug)
tile_xarrays.append(tile)
pbar.update(1)
else:
with self.tqdm_joblib(tqdm(desc=f'Tiles Parallel Downloading', total=(right-left+1)*(top-bottom+1))) as progress_bar:
tile_xarrays = joblib.Parallel(n_jobs=n_jobs, backend=joblib_backend)(joblib.delayed(self._download_tile)\
(base_url, path_id, tile_id, archive, filetype, product, x, y, debug)\
for x in range(left, right + 1) for y in range(bottom, top + 1))

tile_xarrays = [tile for tile in tile_xarrays if tile is not None]
if len(tile_xarrays) == 0:
return
da = xr.combine_by_coords(tile_xarrays)
#return da
if isinstance(da, xr.Dataset):
da = da[list(da.data_vars)[0]]
# crop geometry extent
da = da.sel(lat=slice(bounds[1], bounds[3]), lon=slice(bounds[0], bounds[2]))

if filename is not None:
if os.path.exists(filename):
os.remove(filename)
encoding = {'z': self._compression(da.shape)}
da.rename('z').to_netcdf(filename, encoding=encoding, engine=self.netcdf_engine)
else:
return da

def download_landmask(self, geometry, filename=None, product='1s', skip_exist=True, n_jobs=8, debug=False):
"""
Download and merge gzipped NetCDF tiles for land mask.
from pygmtsar import Tiles
landmask = Tiles().download_landmask(AOI)
landmask.plot.imshow()
Tiles().download_landmask(S1.scan_slc(DATADIR), 'landmask.nc')
"""
return self._download(
base_url = 'https://gmtlandmask.pechnikov.workers.dev/{product}',
#base_url = 'https://alexeypechnikov.github.io/gmtlandmask/{product}',
path_id = '{SN2}',
tile_id = '{SN2}{WE3}.nc.gz',
archive = 'gz',
filetype = 'netcdf',
geometry = geometry,
filename = filename,
product = product,
n_jobs = n_jobs,
joblib_backend = 'loky',
skip_exist = skip_exist,
debug = debug)

# https://copernicus-dem-90m.s3.eu-central-1.amazonaws.com
# https://copernicus-dem-30m.s3.amazonaws.com/Copernicus_DSM_COG_10_N38_00_E038_00_DEM/Copernicus_DSM_COG_10_N38_00_E038_00_DEM.tif
def download_dem_glo(self, geometry, filename=None, product='1s', skip_exist=True, n_jobs=8, debug=False):
"""
Download Copernicus GLO-30/GLO-90 Digital Elevation Model tiles from open AWS storage.
from pygmtsar import Tiles
dem = Tiles().download_dem_glo(AOI)
dem.plot.imshow()
Tiles().download_dem_glo(S1.scan_slc(DATADIR), 'dem_glo.nc')
"""
assert product in ['1s', '3s'], f'ERROR: product name is invalid: {product} for Copernicus GLO DEM. Expected names are "1s", "3s".'
return self._download(
#base_url = 'https://copernicus-dem-{resolution}m.s3.amazonaws.com',
base_url = 'https://copernicusdem{product1}s.pechnikov.workers.dev',
path_id = 'Copernicus_DSM_COG_{product1}0_{SN2}_00_{WE3}_00_DEM',
tile_id = 'Copernicus_DSM_COG_{product1}0_{SN2}_00_{WE3}_00_DEM.tif',
archive = None,
filetype = 'geotif',
geometry = geometry,
filename = filename,
product = product,
skip_exist = skip_exist,
n_jobs = n_jobs,
debug = debug)

# aws s3 ls --no-sign-request s3://elevation-tiles-prod/skadi/
# https://s3.amazonaws.com/elevation-tiles-prod/skadi/N20/N20E000.hgt.gz
def download_dem_srtm(self, geometry, filename=None, product='1s', skip_exist=True, n_jobs=8, debug=False):
"""
Download NASA SRTM Digital Elevation Model tiles from open AWS storage.
from pygmtsar import Tiles
dem = Tiles().download_dem_srtm(AOI)
dem.plot.imshow()
Tiles().download_dem_srtm(S1.scan_slc(DATADIR), 'dem_srtm.nc')
"""
assert product in ['1s'], f'ERROR: only product="1s" is supported for NASA SRTM DEM.'
return self._download(
#base_url = 'https://s3.amazonaws.com/elevation-tiles-prod/skadi',
base_url = 'https://srtmdem1s.pechnikov.workers.dev',
path_id = '{SN2}',
tile_id = '{SN2}{WE3}.hgt.gz',
archive = 'gz',
filetype = 'geotif',
geometry = geometry,
filename = filename,
product = product,
skip_exist = skip_exist,
n_jobs = n_jobs,
debug = debug)

def download_dem(self, geometry, filename=None, product='1s', provider='GLO', skip_exist=True, n_jobs=8, debug=False):
"""
Downloads Copernicus or SRTM Digital Elevation Model (DEM) at 30m or 90m resolution.
Parameters
----------
geometry : object
The Shapely geometry or GeoPandas object or Xarray object for which to download the DEM.
filename : str or None, optional
The name of the file to save the downloaded DEM. If None, a default name will be generated. Default is None.
product : str, optional
The resolution of the DEM. Valid options are '1s' (for 30m) and '3s' (for 90m). Default is '1s'.
provider : str, optional
The provider of the DEM. Valid options are 'GLO' (for Copernicus Global Land Service) and 'SRTM'. Default is 'GLO'.
skip_exist : bool, optional
If True, skips the download if the file already exists. Default is True.
n_jobs : int, optional
The number of concurrent download jobs. Default is 8.
debug : bool, optional
If True, prints debugging information. Default is False.
Returns
-------
str
The ID of the downloaded DEM file.
Raises
------
AssertionError
If an invalid provider or product is specified.
Examples
--------
Tiles().download_dem(AOI, filename='dem.tif', product='1s', provider='GLO', n_jobs=4, skip_exist=True, debug=True)
"""
kwargs = dict(geometry = geometry,
filename = filename,
product = product,
skip_exist = skip_exist,
n_jobs = n_jobs,
debug=debug)
assert provider in ['GLO', 'SRTM'], f'ERROR: provider name is invalid: {provider}. Expected names are "GLO", "SRTM".'
if provider == 'SRTM':
return self.download_dem_srtm(**kwargs)
if provider == 'GLO':
return self.download_dem_glo(**kwargs)
4 changes: 3 additions & 1 deletion pygmtsar/pygmtsar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#
# Licensed under the BSD 3-Clause License (see LICENSE for details)
# ----------------------------------------------------------------------------
__version__ = '2024.2.21.post8'
__version__ = '2024.2.21.post9'

# unified progress indicators
from .tqdm_joblib import tqdm_joblib
Expand All @@ -26,6 +26,8 @@
from .AWS import AWS
from .ESA import ESA
from .GMT import GMT
# tiles downloading
from .Tiles import Tiles
# XYZ map tiles downloading
from .XYZTiles import XYZTiles
# morphology and other helper functions
Expand Down

0 comments on commit f114508

Please sign in to comment.