diff --git a/src/tile2net/misc/attrs.py b/src/tile2net/misc/attrs.py index 0016bbd..50311b8 100644 --- a/src/tile2net/misc/attrs.py +++ b/src/tile2net/misc/attrs.py @@ -1,61 +1,47 @@ from __future__ import annotations +import numpy as np +from numpy import ndarray + +from geopandas import GeoDataFrame, GeoSeries +from pandas import Series, DataFrame + import copy +from geopandas import GeoDataFrame, GeoSeries +from pandas import Series, DataFrame import functools import os from _weakrefset import WeakSet -from typing import Callable, Type +from typing import Callable, Type, Union +import numpy as np import pandas as pd import logging import pickle from pandas.core.generic import NDFrame +# todo: force fget if keyerror when setting column or subframe -__all__ = ['attr', 'subframe'] - -class AttrMeta(type): - def __get__(cls, func: Callable) -> Callable: - @functools.wraps(func) - def wrapper(self: attr, instance, owner): - # saves if not on disk, loads if on disk - self.instance = instance - self.owner = type(instance) - if instance is None: - return self - - if not self: - res = self.fget(instance) - self.__set__(instance, res) - - res = func(self, instance, owner) - return res - - return wrapper +__all__ = ['attr', 'subframe', 'column'] - def __set__(cls, func: Callable) -> Callable: - @functools.wraps(func) - def wrapper(self: attr, instance: NDFrame, value): - self.instance = instance - self.owner = type(instance) - func(self, instance, value) - return wrapper - - def __delete__(cls, func: Callable) -> Callable: - @functools.wraps(func) - def wrapper(self: attr, instance: NDFrame): - self.instance = instance - self.owner = type(instance) - func(self, instance) - - return wrapper - -class attr(metaclass=AttrMeta): +class attr: instance: NDFrame owner: Type[NDFrame] - - def __init__(self, func=None, *, pickle=False, log=False, **kwargs): + _validate = None + + def __init__( + self, + func=None, + *, + pickle=False, + log=False, + auto=False, + step=False, + constant=False, + init=False, + **kwargs + ): """ Caches an attribute across Frame instances. @@ -71,24 +57,67 @@ def __init__(self, func=None, *, pickle=False, log=False, **kwargs): self.fget = func self.pickle = pickle self.log = log + self.auto = auto + self.step = step + self.constant = constant + self.init = init - def __set__(self, instance: NDFrame, value): + def set(self, instance, value): instance.attrs[self.name] = value - def __get__(self, instance: NDFrame, owner): + def __set__(self, instance: NDFrame, value): + self.instance = instance + self.owner = type(instance) + if self._validate is not None: + value = self._validate(instance, value) + self.set(instance, value) + + @classmethod + def validate(cls, func): + self = cls() + self._validate = func + return self + + def get(self, instance, owner): return instance.attrs[self.name] - def __delete__(self, instance: NDFrame): - del instance.attrs[self.name] + def __get__(self, instance: NDFrame, owner): + self.instance = instance + self.owner = owner + if instance is None: + return self + if not self: + res = self.fget(instance) + self.__set__(instance, res) + return self.get(instance, owner) - def __call__(self, func): + def delete(self, instance): + if self.constant: + raise UserWarning( + f'Deleting constant attribute {self.name}' + ) + try: + del instance.attrs[self.name] + except KeyError: + pass + + def __delete__(self, instance: NDFrame): + self.instance = instance + self.owner = type(instance) + self.delete(instance) + + def __call__(self, obj): + if isinstance(obj, NDFrame): + # attr is being used as unbound method + return self.__get__(obj, type(obj)) + # attr is wrapping a method if ( - not callable(func) - and hasattr(func, '__get__') + not callable(obj) + and hasattr(obj, '__get__') ): - # func is some sort of property - func = func.__get__ - self.fget = func + # obj is some sort of property + obj = obj.__get__ + self.fget = obj return self def __set_name__(self, owner, name): @@ -97,15 +126,19 @@ def __set_name__(self, owner, name): def fget(func): @functools.wraps(func) def wrapper(instance): + # todo: we need to be able to redo fget if index difference if ( self.pickle and os.path.exists(path := self.__fspath__()) ): + raise NotImplementedError if self.log: logging.info(f'Loading {self.name} from {path}') with open(path, 'rb') as f: res = pickle.load(f) else: + if self.log: + logging.info(f'Calculating {self.name}') fget = func if ( not callable(fget) @@ -114,6 +147,8 @@ def wrapper(instance): # func is some sort of property fget = fget.__get__ res = fget(instance) + if self.step: + object.__setattr__(res, self.name, res) if self.pickle: path = self.__fspath__() os.makedirs( @@ -134,19 +169,60 @@ def wrapper(instance): self.fget = fget(self.fget) functools.update_wrapper(self, self.fget) - def __new__(cls, *args, **kwargs): - cls = copy.copy(cls) - meta = type(cls) - # creates a copy of the class so that descriptor methods are wrapped on the outermost - # but the wrapper is not inherited - for wrapper in '__get__ __set__ __delete__'.split(): - # wrapping this way so that wrapper is always outermost - func = getattr(cls, wrapper) - decorate = getattr(meta, wrapper) - wrapped = decorate(meta, func) - setattr(cls, wrapper, wrapped) - self = super().__new__(cls) - return self + if self.auto: + @functools.wraps(owner.__init__) + def init(*args, **kwargs): + owner.__init__(*args, **kwargs) + self.__get__(*args, **kwargs) + + owner.__init__ = init + + # def __new__(cls, *args, **kwargs): + # def __get__(func: Callable) -> Callable: + # # @functools.wraps(func) + # def wrapper(self: attr, instance, owner): + # # saves if not on disk, loads if on disk + # self.instance = instance + # self.owner = type(instance) + # if instance is None: + # return self + # + # if not self: + # res = self.fget(instance) + # self.__set__(instance, res) + # + # res = func(self, instance, owner) + # return res + # + # return wrapper + # + # def __set__(func: Callable) -> Callable: + # # @functools.wraps(func) + # def wrapper(self: attr, instance: NDFrame, value): + # self.instance = instance + # self.owner = type(instance) + # func(self, instance, value) + # + # return wrapper + # + # def __delete__(func: Callable) -> Callable: + # # @functools.wraps(func) + # def wrapper(self: attr, instance: NDFrame): + # self.instance = instance + # self.owner = type(instance) + # func(self, instance) + # + # return wrapper + # + # # cls.__get__ = __get__(cls.__get__) + # # cls.__set__ = __set__(cls.__set__) + # # cls.__delete__ = __delete__(cls.__delete__) + # self = super().__new__(cls) + # self.__get__ = __get__(self.__get__) + # self.__set__ = __set__(self.__set__) + # self.__delete__ = __delete__(self.__delete__) + # return self + # def __repr__(self): try: @@ -169,6 +245,7 @@ def __fspath__(self): self.name + '.pkl' ) + class subframe(attr): """ Caches a Frame or Series as an attribute of the parent frame, preserving it across @@ -180,13 +257,50 @@ class subframe(attr): :param kwargs: Parameters to pass to NDFrame.reindex """ - def __get__(self, instance, owner) -> NDFrame: - res: NDFrame = super().__get__(instance, owner) - if self not in self.aligned: - res = res.reindex(instance.index.unique()) - self.__set__(instance, res) - self.aligned.add(self) - return res + # def __get__(self, instance, owner) -> NDFrame: + # res: NDFrame = super().__get__(instance, owner) + # if self not in self.aligned: + # # res = res.reindex(instance.index.unique(), ) + # loc = instance.index.unique().intersection(res.index) + # res = res.loc[loc] + # self.__set__(instance, res) + # self.aligned.add(self) + # return res + + # def __get__(self, instance: DataFrame, owner) -> Union[subframe, NDFrame]: + # unaligned = super().__get__(instance, owner) + # if unaligned is self: + # return self + # if self not in self.aligned: + # # loc = instance.index.unique().intersection() + # loc = ( + # instance.index + # .unique() + # .intersection(unaligned.index) + # ) + # aligned = unaligned.loc[loc] + # self.__set__(instance, aligned) + # # self.aligned.add(self) + + def get(self, instance, owner): + unaligned = super().get(instance, owner) + if unaligned is self: + return self + if self.name not in instance.__dict__: + loc = ( + instance.index + .unique() + .intersection(unaligned.index) + ) + aligned = unaligned.loc[loc] + instance.__dict__[self.name] = aligned + aligned = instance.__dict__[self.name] + return aligned + + def delete(self, instance): + super().delete(instance) + if self.name in instance.__dict__: + del instance.__dict__[self.name] def __set_name__(self, owner, name): def fget(func): @@ -225,9 +339,55 @@ def wrapper(instance): return wrapper self.fget = fget(self.fget) - self.aligned = WeakSet() super().__set_name__(owner, name) + +class column(attr): + def set(self, instance, value): + value.__class__.mro() + # Series.mro() + # ndarray.mro() + # if isinstance(value, ndarray): + # assert len(value) == len(instance) + # elif not ( + # value.index + # .difference(instance.index) + # .empty + # ): + # # todo: perhaps force it to recompute + # raise ValueError('Cannot assign a Series with a different index') + if ( + isinstance(value, Series) + and not value.index.difference(instance.index).empty + ): + raise ValueError('Cannot assign a Series with a different index') + + + instance[self.name] = value + + def delete(self, instance): + try: + del instance[self.name] + except KeyError: + ... + + + def get(self, instance: DataFrame, owner): + # return instance[self.name] + if self.name in instance.index.names: + return instance.index.get_level_values(self.name) + return instance[self.name] + + def __bool__(self): + # return self.name in self.instance.columns + # noinspection PyTypeChecker + instance: DataFrame = self.instance + if self.name in instance.index.names: + return True + return self.name in instance.columns + + + if __name__ == '__main__': from pandas import DataFrame @@ -251,6 +411,12 @@ def frame(self): print('FRAME') return self.copy() + @column + @property + def col(self): + print('COL') + return np.arange(len(self)) + @property def _constructor(self) -> Callable[..., Test]: return type(self) @@ -259,6 +425,7 @@ def _constructor(self) -> Callable[..., Test]: def prop(self): return True + test = Test({ 'a': [1, 2, 3], 'b': [4, 5, 6], diff --git a/src/tile2net/misc/desc_attrs.py b/src/tile2net/misc/desc_attrs.py index 33c927b..9fbdcb2 100644 --- a/src/tile2net/misc/desc_attrs.py +++ b/src/tile2net/misc/desc_attrs.py @@ -7,6 +7,7 @@ from tile2net.misc.attrs import attr +# noinspection PyTypeChecker class desc_attr(attr): instance: Descriptor owner: Type[Descriptor] @@ -21,13 +22,13 @@ def __set_name__(self, owner, name): self.cache = WeakKeyDictionary() super().__set_name__(owner, name) - def __get__(self, instance: Descriptor, owner): + def get(self, instance, owner): return self.cache[self] - def __set__(self, instance: Descriptor, value): + def set(self, instance, value): self.cache[self] = value - def __delete__(self, instance: Descriptor): + def delete(self, instance): del self.cache[self] def __bool__(self): @@ -35,7 +36,8 @@ def __bool__(self): def __fspath__(self): return os.path.join( - self.instance._artifacts.__fspath__(), + self.instance.artifacts.__fspath__(), + self.instance.name, self.name + '.pkl' ) @@ -53,14 +55,12 @@ class TestDesc(Descriptor): @desc_attr @property def attr(self): - print('attr') return False @desc_subframe @property def subframe(self): - print('subframe') - return self._artifacts.copy() + return self.artifacts.copy() class TestFrame(DataFrame): desc = TestDesc() diff --git a/src/tile2net/misc/frame.py b/src/tile2net/misc/frame.py index ed7bd0c..fde5fd4 100644 --- a/src/tile2net/misc/frame.py +++ b/src/tile2net/misc/frame.py @@ -1,4 +1,6 @@ from __future__ import annotations + +import time from typing import Callable, TypeVar from functools import cached_property @@ -7,22 +9,38 @@ from pandas.core.generic import NDFrame from pandas.core.indexing import _LocIndexer, _iLocIndexer - +from tile2net.misc.attrs import attr, column __all__ = ['Frame'] F = TypeVar('F', bound='Frame') + class iLocIndexer(_iLocIndexer): __getitem__: Callable[..., F] + class LocIndexer(_LocIndexer): __getitem__: Callable[..., F] + class FrameMeta(type): ... + # @property + # def frame_attrs(cls) -> dict[str, attr]: + # result: dict[str, attr] = { + # key: value + # for parent in cls.mro() + # if isinstance(parent, FrameMeta) + # for key, value in parent.__dict__.items() + # if isinstance(value, attr) + # } + # return result + class Frame(DataFrame, metaclass=FrameMeta): + + # class Frame(DataFrame): @cached_property def _constructor(self): return type(self) @@ -35,30 +53,56 @@ def loc(self) -> LocIndexer: def iloc(self) -> iLocIndexer: return iLocIndexer('iloc', self) + @attr + @property + def timestamp(self): + return time.time().__int__() + def __hash__(self): # to support caching - return hash(id(self)) + return hash(self.timestamp) def __eq__(self, other): - return self is other + return self.timestamp == other.timestamp - def __init__(self, data, *args, **kwargs): + def __init__(self, data=None, *args, **kwargs): super().__init__(data, *args, **kwargs) if isinstance(data, NDFrame): self.attrs = data.attrs.copy() def __repr__(self): constructor = self._constructor - self._constructor = self.__class__.__base__ - res = super().__repr__() + self._constructor = DataFrame + # res = super().__repr__() + res = super(Frame, self).__repr__() self._constructor = constructor return res + def set_axis( + self, + labels, + *, + axis: Axis = 0, + copy: bool | None = None, + ) -> DataFrame: + result = super().set_axis(labels, axis=axis, copy=copy) + return result + + def flush_columns(self): + # del all column methods + cls = self.__class__ + for col in self: + if ( + hasattr(cls, col) + and isinstance(getattr(cls, col), column) + ): + delattr(self, col) + return self + + def mro(cls: FrameMeta) -> list[type]: # prioritize attributes explicitly defined in frame mro = type.mro(cls) mro.remove(Frame) mro.insert(1, Frame) return mro - -FrameMeta.mro = mro diff --git a/src/tile2net/raster/source.py b/src/tile2net/raster/source.py index 35563c5..dbfb5fe 100644 --- a/src/tile2net/raster/source.py +++ b/src/tile2net/raster/source.py @@ -307,16 +307,25 @@ class LosAngeles(ArcGis): # def metadata(cls): # raise NotImplementedError -class WestOregon(ArcGis, init=False): - server = 'https://imagery.oregonexplorer.info/arcgis/rest/services/OSIP_2018/OSIP_2018_WM/ImageServer' - name = 'w_or' - extension = 'jpeg' - keyword = 'Oregon' - # todo: ssl incorrectly configured; come back later - -class EastOregon(ArcGis, init=False): - server = 'https://imagery.oregonexplorer.info/arcgis/rest/services/OSIP_2017/OSIP_2017_WM/ImageServer' - name = 'e_or' +# class WestOregon(ArcGis, init=False): +# class WestOregon(ArcGis): +# server = 'https://imagery.oregonexplorer.info/arcgis/rest/services/OSIP_2018/OSIP_2018_WM/ImageServer' +# name = 'w_or' +# extension = 'jpeg' +# keyword = 'Oregon' +# # todo: ssl incorrectly configured; come back later +# +# # class EastOregon(ArcGis, init=False): +# class EastOregon(ArcGis, init=False): +# +# server = 'https://imagery.oregonexplorer.info/arcgis/rest/services/OSIP_2017/OSIP_2017_WM/ImageServer' +# name = 'e_or' +# extension = 'jpeg' +# keyword = 'Oregon' + +class Oregon(ArcGis): + server = 'https://imagery.oregonexplorer.info/arcgis/rest/services/OSIP_2022/OSIP_2022_WM/ImageServer' + name = 'or' extension = 'jpeg' keyword = 'Oregon' diff --git a/src/tile2net/raster/tile_utils/geodata_utils.py b/src/tile2net/raster/tile_utils/geodata_utils.py index 23723a5..961f200 100644 --- a/src/tile2net/raster/tile_utils/geodata_utils.py +++ b/src/tile2net/raster/tile_utils/geodata_utils.py @@ -1,4 +1,6 @@ import os +from tile2net.logger import logger +import shapely import geopandas as gpd import pandas as pd import numpy as np @@ -9,249 +11,263 @@ def read_gdf(path): - """ - Read a GeoDataFrame from a file - Parameters - ---------- - path: str - path to the file + """ + Read a GeoDataFrame from a file + Parameters + ---------- + path: str + path to the file - Returns - ------- - gdf: GeoDataFrame - """ - gdf = gpd.read_file(path) - return gdf + Returns + ------- + gdf: GeoDataFrame + """ + gdf = gpd.read_file(path) + return gdf def set_gdf_crs(gdf, crs): - """ - Set the CRS of a GeoDataFrame - Parameters - ---------- - gdf: GeoDataFrame - crs: int - coordinate reference system - - Returns - ------- - gdf: GeoDataFrame - """ - gdf.geometry = gdf.geometry.set_crs(crs) - return gdf + """ + Set the CRS of a GeoDataFrame + Parameters + ---------- + gdf: GeoDataFrame + crs: int + coordinate reference system + + Returns + ------- + gdf: GeoDataFrame + """ + gdf.geometry = gdf.geometry.set_crs(crs) + return gdf def change_crs(gdf, crs): - """ - Change the CRS of a GeoDataFrame - Parameters - ---------- - gdf: GeoDataFrame - crs: int + """ + Change the CRS of a GeoDataFrame + Parameters + ---------- + gdf: GeoDataFrame + crs: int - Returns - ------- - gdf: GeoDataFrame - """ - gdf.geometry = gdf.geometry.to_crs(crs) - return gdf + Returns + ------- + gdf: GeoDataFrame + """ + gdf.geometry = gdf.geometry.to_crs(crs) + return gdf def prepare_spindex(gdf: gpd.GeoDataFrame): - """ - Prepare a GeoDataFrame for spatial indexing - Parameters - ---------- - gdf: GeoDataFrame + """ + Prepare a GeoDataFrame for spatial indexing + Parameters + ---------- + gdf: GeoDataFrame - Returns - ------- - spatial index of a GeoDataFrame - """ - return gdf.sindex + Returns + ------- + spatial index of a GeoDataFrame + """ + return gdf.sindex def _reduce_geom_precision(geom, precision=2): - """ - Reduce the precision of a geometry to a given number of decimal places. - Parameters - ---------- - geom: shapely.geometry - precision: int - number of decimal places to round to - - Returns - ------- - geom: shapely.geometry - """ - geojson = mapping(geom) - geojson['coordinates'] = np.round( - np.array(geojson['coordinates']), - precision - ) - return shape(geojson) + """ + Reduce the precision of a geometry to a given number of decimal places. + Parameters + ---------- + geom: shapely.geometry + precision: int + number of decimal places to round to + + Returns + ------- + geom: shapely.geometry + """ + geojson = mapping(geom) + geojson['coordinates'] = np.round( + np.array(geojson['coordinates']), + precision + ) + return shape(geojson) def affine_to_list(affine_obj): - """Convert a :class:`affine.Affine` instance to a list for Shapely.""" - return [affine_obj.a, affine_obj.b, - affine_obj.d, affine_obj.e, - affine_obj.xoff, affine_obj.yoff] + """Convert a :class:`affine.Affine` instance to a list for Shapely.""" + return [affine_obj.a, affine_obj.b, + affine_obj.d, affine_obj.e, + affine_obj.xoff, affine_obj.yoff] def list_to_affine(xform_mat): - """Create an Affine from a list or array-formatted [a, b, d, e, xoff, yoff] - - Arguments - --------- - xform_mat : `list` or :class:`numpy.array` - A `list` of values to convert to an affine object. - - Returns - ------- - aff : :class:`affine.Affine` - An affine transformation object. - """ - # first make sure it's not in gdal order - if len(xform_mat) > 6: - xform_mat = xform_mat[0:6] - if rasterio.transform.tastes_like_gdal(xform_mat): - return Affine.from_gdal(*xform_mat) - else: - return Affine(*xform_mat) + """Create an Affine from a list or array-formatted [a, b, d, e, xoff, yoff] + + Arguments + --------- + xform_mat : `list` or :class:`numpy.array` + A `list` of values to convert to an affine object. + + Returns + ------- + aff : :class:`affine.Affine` + An affine transformation object. + """ + # first make sure it's not in gdal order + if len(xform_mat) > 6: + xform_mat = xform_mat[0:6] + if rasterio.transform.tastes_like_gdal(xform_mat): + return Affine.from_gdal(*xform_mat) + else: + return Affine(*xform_mat) def _check_rasterio_im_load(im): - """Check if `im` is already loaded in; if not, load it in.""" - if isinstance(im, str): - return rasterio.open(im) - elif isinstance(im, rasterio.DatasetReader): - return im - else: - raise ValueError( - "{} is not an accepted image format for rasterio.".format(im) - ) + """Check if `im` is already loaded in; if not, load it in.""" + if isinstance(im, str): + return rasterio.open(im) + elif isinstance(im, rasterio.DatasetReader): + return im + else: + raise ValueError( + "{} is not an accepted image format for rasterio.".format(im) + ) def _check_skimage_im_load(im): - """Check if `im` is already loaded in; if not, load it in.""" - if isinstance(im, str): - return skimage.io.imread(im) - elif isinstance(im, np.ndarray): - return im - else: - raise ValueError( - "{} is not an accepted image format for scikit-image.".format(im) - ) + """Check if `im` is already loaded in; if not, load it in.""" + if isinstance(im, str): + return skimage.io.imread(im) + elif isinstance(im, np.ndarray): + return im + else: + raise ValueError( + "{} is not an accepted image format for scikit-image.".format(im) + ) def prepare_class_gdf(polys, class_name) -> object: - """ - separates the polygons of each class, given the keyboard (sidewalk, crosswalk, road) - Args: - polys (geodataframe): the dataframe containing the polygons of all classes - class_name(str): the class label, sidewalk, crosswalk, road + """ + separates the polygons of each class, given the keyboard (sidewalk, crosswalk, road) + Args: + polys (geodataframe): the dataframe containing the polygons of all classes + class_name(str): the class label, sidewalk, crosswalk, road - Returns: - class specific GeoDataFrame in metric projection - """ + Returns: + class specific GeoDataFrame in metric projection + """ - nt = polys[polys.f_type == f'{class_name}'].copy() - nt.geometry = nt.geometry.to_crs(3857) - return nt + nt = polys[polys.f_type == f'{class_name}'].copy() + nt.geometry = nt.geometry.to_crs(3857) + return nt def prepare_gdf(gdf, **cols): - """ - Filter a GeoDataFrame based on a set of columns and values - Parameters - ---------- - gdf: GeoDataFrame - cols: dict - {column_name: value} - - Returns - ------- - f_gdf: GeoDataFrame - """ - # TODO: Add other operations like ! - k = list(cols.keys())[0] - print(f'k, {k}', f'cols {len(cols[k])}') - if isinstance(cols[k], list): - f_gdf = gdf[gdf[k].isin(cols[k])] - else: - f_gdf = gdf[gdf[k]==cols[k]] - return f_gdf + """ + Filter a GeoDataFrame based on a set of columns and values + Parameters + ---------- + gdf: GeoDataFrame + cols: dict + {column_name: value} + + Returns + ------- + f_gdf: GeoDataFrame + """ + # TODO: Add other operations like ! + k = list(cols.keys())[0] + print(f'k, {k}', f'cols {len(cols[k])}') + if isinstance(cols[k], list): + f_gdf = gdf[gdf[k].isin(cols[k])] + else: + f_gdf = gdf[gdf[k] == cols[k]] + return f_gdf def read_dataframe(src_path, geo=True, cols=None): - """ - Args: - src_path: - geo: if True, will create GeoDataFrame - cols: optional. Name of specific columns to be read - Returns: - """ - if geo: - if cols: - df = gpd.read_file(src_path, usecols=cols) - else: - df = gpd.read_file(src_path) - else: - if cols: - df = pd.read_csv(src_path, usecols=cols) - else: - df = pd.DataFrame(src_path) - return df - - -def unary_multi(gdf): - """ - handles the errors with multipolygon - """ - if gdf.unary_union.type == 'MultiPolygon': - gdf_uni = gpd.GeoDataFrame(geometry=gpd.GeoSeries([geom for geom in gdf.unary_union.geoms])) - else: - gdf_uni = gpd.GeoDataFrame(geometry=gpd.GeoSeries(gdf.unary_union)) - return gdf_uni + """ + Args: + src_path: + geo: if True, will create GeoDataFrame + cols: optional. Name of specific columns to be read + Returns: + """ + if geo: + if cols: + df = gpd.read_file(src_path, usecols=cols) + else: + df = gpd.read_file(src_path) + else: + if cols: + df = pd.read_csv(src_path, usecols=cols) + else: + df = pd.DataFrame(src_path) + return df + + +# def unary_multi(gdf): +# """ +# handles the errors with multipolygon +# """ +# if gdf.unary_union.type == 'MultiPolygon': +# gdf_uni = gpd.GeoDataFrame(geometry=gpd.GeoSeries([geom for geom in gdf.unary_union.geoms])) +# else: +# gdf_uni = gpd.GeoDataFrame(geometry=gpd.GeoSeries(gdf.unary_union)) +# return gdf_uni + +def unary_multi(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + # handles the errors with multipolygon + loc = ~gdf.is_valid.values + logger.warning(f'Number of invalid geometries: {loc.sum()} out of {len(gdf)}') + gdf.geometry.loc[loc] = shapely.make_valid(gdf.geometry.loc[loc]) + result = ( + gdf + # dissolve overlapping geometries + .dissolve() + # explode multipart geometries + .explode() + ) + return result def buffer_union(gdf, buff, simp1, simp2): - """ - buffer and union the polygons in a GeoDataFrame - Parameters - ---------- - gdf: GeoDataFrame - buff: float - buffer distance - simp1: float - simplification tolerance for the buffer - simp2: float - simplification tolerance for the union - - Returns - ------- - gdf_uni: GeoDataFrame - """ - gdf.geometry = gdf.geometry.buffer(buff, join_style=2, cap_style=3) - gdf.geometry = gdf.simplify(simp1) - gdf_uni = unary_multi(gdf) - gdf_uni.geometry = gdf_uni.geometry.set_crs(3857) - gdf_uni.geometry = gdf_uni.geometry.simplify(simp2) - return gdf_uni + """ + buffer and union the polygons in a GeoDataFrame + Parameters + ---------- + gdf: GeoDataFrame + buff: float + buffer distance + simp1: float + simplification tolerance for the buffer + simp2: float + simplification tolerance for the union + Returns + ------- + gdf_uni: GeoDataFrame + """ + gdf.geometry = gdf.geometry.buffer(buff, join_style=2, cap_style=3) + gdf.geometry = gdf.simplify(simp1) + gdf_uni = unary_multi(gdf) + gdf_uni.geometry = gdf_uni.geometry.set_crs(3857) + gdf_uni.geometry = gdf_uni.geometry.simplify(simp2) + return gdf_uni -def buffer_union_erode(gdf, buff, erode, simp1, simp2, simp3): - gdf_buff = buffer_union(gdf, buff, simp1, simp2) - gdf_erode = gdf_buff.geometry.buffer(erode, join_style=2, cap_style=3) - gdf_uni = unary_multi(gdf_erode) - gdf_uni.geometry = gdf_uni.geometry.set_crs(3857) - gdf_uni.geometry = gdf_uni.geometry.simplify(simp3) - return gdf_uni +def buffer_union_erode(gdf, buff, erode, simp1, simp2, simp3): + gdf_buff = buffer_union(gdf, buff, simp1, simp2) + gdf_erode = gdf_buff.copy() + gdf_erode.geometry = gdf_buff.geometry.buffer(erode, join_style=2, cap_style=3) + gdf_uni = unary_multi(gdf_erode) + gdf_uni.geometry = gdf_uni.geometry.set_crs(3857) + gdf_uni.geometry = gdf_uni.geometry.simplify(simp3) + return gdf_uni def to_metric(gdf, crs=3857): - """Converts a GeoDataFrame to metric (3857) coordinate + """Converts a GeoDataFrame to metric (3857) coordinate Parameters ---------- gdf : GeoDataFrame @@ -263,130 +279,130 @@ def to_metric(gdf, crs=3857): GeoDataFrame GeoDataFrame of polygons in metric coordinate system """ - gdf.to_crs(crs, inplace=True) - return gdf + gdf.to_crs(crs, inplace=True) + return gdf def geo2geodf(geo_lst): - """ - Converts a list of shapely geometries to a GeoDataFrame - Parameters - ---------- - geo_lst: list + """ + Converts a list of shapely geometries to a GeoDataFrame + Parameters + ---------- + geo_lst: list - Returns - ------- - gdf: GeoDataFrame + Returns + ------- + gdf: GeoDataFrame - """ - gdf = gpd.GeoDataFrame(geometry=geo_lst) - return gdf + """ + gdf = gpd.GeoDataFrame(geometry=geo_lst) + return gdf def merge_dfs(gdf1, gdf2, crs=4326): - """ - merges two dataframes with the results of segmentation (three classes) - Parameters - ---------- - gdf1: GeoDataFrame - gdf2: GeoDataFrame - crs: int + """ + merges two dataframes with the results of segmentation (three classes) + Parameters + ---------- + gdf1: GeoDataFrame + gdf2: GeoDataFrame + crs: int - Returns - ------- + Returns + ------- - """ - if gdf1.crs!=gdf2.crs: - gdf1.to_crs(crs, inplace=True) - gdf2.to_crs(crs, inplace=True) + """ + if gdf1.crs != gdf2.crs: + gdf1.to_crs(crs, inplace=True) + gdf2.to_crs(crs, inplace=True) - df1sw = prepare_class_gdf(gdf1, 'sidewalk') - df1cw = prepare_class_gdf(gdf1, 'crosswalk') - df1rd = prepare_class_gdf(gdf1, 'road') + df1sw = prepare_class_gdf(gdf1, 'sidewalk') + df1cw = prepare_class_gdf(gdf1, 'crosswalk') + df1rd = prepare_class_gdf(gdf1, 'road') - df2sw = prepare_class_gdf(gdf2, 'sidewalk') - df2cw = prepare_class_gdf(gdf2, 'crosswalk') - df2rd = prepare_class_gdf(gdf2, 'road') + df2sw = prepare_class_gdf(gdf2, 'sidewalk') + df2cw = prepare_class_gdf(gdf2, 'crosswalk') + df2rd = prepare_class_gdf(gdf2, 'road') - concsw = pd.concat([df1sw, df2sw]) - conccw = pd.concat([df1cw, df2cw]) - concrd = pd.concat([df1rd, df2rd]) + concsw = pd.concat([df1sw, df2sw]) + conccw = pd.concat([df1cw, df2cw]) + concrd = pd.concat([df1rd, df2rd]) - unionsw = unary_multi(concsw) - unionsw = unionsw.explode().reset_index(drop=True) - unionsw.geometry = unionsw.geometry.set_crs(crs) - unionsw['f_type'] = 'sidewalk' + unionsw = unary_multi(concsw) + unionsw = unionsw.explode().reset_index(drop=True) + unionsw.geometry = unionsw.geometry.set_crs(crs) + unionsw['f_type'] = 'sidewalk' - unioncw = unary_multi(conccw) + unioncw = unary_multi(conccw) - unioncw.geometry = unioncw.geometry.set_crs(crs) - unioncw['f_type'] = 'crosswalk' + unioncw.geometry = unioncw.geometry.set_crs(crs) + unioncw['f_type'] = 'crosswalk' - unionrd = unary_multi(concrd) + unionrd = unary_multi(concrd) - unionrd.geometry = unionrd.geometry.set_crs(crs) - unionrd['f_type'] = 'road' + unionrd.geometry = unionrd.geometry.set_crs(crs) + unionrd['f_type'] = 'road' - merged = pd.concat([unionrd, unionsw, unioncw]) - merged.geometry = merged.geometry.set_crs(crs) + merged = pd.concat([unionrd, unionsw, unioncw]) + merged.geometry = merged.geometry.set_crs(crs) - return merged + return merged def create_stats(gdf): - """ + """ - Parameters - ---------- - gdf: GeoDataFrame + Parameters + ---------- + gdf: GeoDataFrame - Returns - ------- + Returns + ------- - """ - cgdf = gdf.copy() - cgdf['primeter'] = cgdf.length - cgdf['area'] = cgdf.area - cgdf['ar_pratio'] = cgdf.area/cgdf.length - # get the summary statics of the polygons - ss = cgdf.quantile([0.25, 0.5, 0.75]) - return ss, cgdf + """ + cgdf = gdf.copy() + cgdf['primeter'] = cgdf.length + cgdf['area'] = cgdf.area + cgdf['ar_pratio'] = cgdf.area / cgdf.length + # get the summary statics of the polygons + ss = cgdf.quantile([0.25, 0.5, 0.75]) + return ss, cgdf def buff_dfs(gdf): - """ - union and buffer the polygons of each class separately, - to create continuous polygons and merge them into one GeoDataFrame. + """ + union and buffer the polygons of each class separately, + to create continuous polygons and merge them into one GeoDataFrame. - Parameters - ---------- - gdf: GeoDataFrame - Polygon dataframes with three classes in metric coordinate system - crs: int + Parameters + ---------- + gdf: GeoDataFrame + Polygon dataframes with three classes in metric coordinate system + crs: int - Returns - ------- - GeoDataFrame: - merged GeoDataFrame of the three classes - """ + Returns + ------- + GeoDataFrame: + merged GeoDataFrame of the three classes + """ - gdf.geometry = gdf.simplify(0.2) - dfsw = prepare_class_gdf(gdf, 'sidewalk') - dfcw = prepare_class_gdf(gdf, 'crosswalk') - dfrd = prepare_class_gdf(gdf, 'road') + gdf.geometry = gdf.simplify(0.2) + dfsw = prepare_class_gdf(gdf, 'sidewalk') + dfcw = prepare_class_gdf(gdf, 'crosswalk') + dfrd = prepare_class_gdf(gdf, 'road') - buffersw = buffer_union_erode(dfsw, 0.3, -0.3, 0.2, 0.3, 0.3) + buffersw = buffer_union_erode(dfsw, 0.3, -0.3, 0.2, 0.3, 0.3) - buffersw['f_type'] = 'sidewalk' + buffersw['f_type'] = 'sidewalk' - buffercw = buffer_union_erode(dfcw, 0.3, -0.25, 0.2, 0.3, 0.3) + buffercw = buffer_union_erode(dfcw, 0.3, -0.25, 0.2, 0.3, 0.3) - buffercw['f_type'] = 'crosswalk' + buffercw['f_type'] = 'crosswalk' - bufferrd = buffer_union_erode(dfrd, 0.4, -0.4, 0.2, 0.3, 0.3) - bufferrd['f_type'] = 'road' + bufferrd = buffer_union_erode(dfrd, 0.4, -0.4, 0.2, 0.3, 0.3) + bufferrd['f_type'] = 'road' - merged = pd.concat([buffercw, buffersw, bufferrd]) - merged.geometry = merged.geometry.set_crs(gdf.crs) + merged = pd.concat([buffercw, buffersw, bufferrd]) + merged.geometry = merged.geometry.set_crs(gdf.crs) - return merged + return merged